View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.Iterator;
23  
24  import org.apache.maven.doxia.logging.Log;
25  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
26  import org.apache.maven.doxia.sink.impl.SinkEventElement;
27  import org.apache.maven.doxia.sink.impl.SinkEventTestingSink;
28  
29  /**
30   * Test for XhtmlBaseParser.
31   */
32  public class Xhtml5BaseParserTest
33      extends AbstractParserTest
34  {
35      private Xhtml5BaseParser parser;
36      private final SinkEventTestingSink sink = new SinkEventTestingSink();
37  
38  
39      @Override
40      protected Parser createParser()
41      {
42          parser = new Xhtml5BaseParser();
43          parser.getLog().setLogLevel( Log.LEVEL_ERROR );
44          return parser;
45      }
46  
47      @Override
48      protected String outputExtension()
49      {
50          return "xhtml";
51      }
52  
53      @Override
54      protected void setUp() throws Exception
55      {
56          super.setUp();
57  
58          parser = new Xhtml5BaseParser();
59          parser.getLog().setLogLevel( Log.LEVEL_ERROR );
60          sink.reset();
61      }
62  
63      /** Test Doxia version. */
64      public void testDoxiaVersion()
65      {
66          assertNotNull( XhtmlBaseParser.doxiaVersion() );
67          assertFalse( "unknown".equals( XhtmlBaseParser.doxiaVersion() ) );
68      }
69  
70      /** @throws Exception  */
71      public void testHeadingEventsList()
72          throws Exception
73      {
74          String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
75  
76          parser.parse( text, sink );
77  
78          Iterator<SinkEventElement> it = sink.getEventList().iterator();
79  
80          assertEquals( "paragraph", it.next().getName() );
81          assertEquals( "section1", it.next().getName() );
82          assertEquals( "sectionTitle1", it.next().getName() );
83          assertEquals( "sectionTitle1_", it.next().getName() );
84          assertEquals( "section2", it.next().getName() );
85          assertEquals( "sectionTitle2", it.next().getName() );
86          assertEquals( "sectionTitle2_", it.next().getName() );
87          assertEquals( "section3", it.next().getName() );
88          assertEquals( "sectionTitle3", it.next().getName() );
89          assertEquals( "sectionTitle3_", it.next().getName() );
90          assertEquals( "section4", it.next().getName() );
91          assertEquals( "sectionTitle4", it.next().getName() );
92          assertEquals( "sectionTitle4_", it.next().getName() );
93          assertEquals( "section5", it.next().getName() );
94          assertEquals( "sectionTitle5", it.next().getName() );
95          assertEquals( "sectionTitle5_", it.next().getName() );
96          assertEquals( "section5_", it.next().getName() );
97          assertEquals( "section4_", it.next().getName() );
98          assertEquals( "section3_", it.next().getName() );
99          assertEquals( "section2_", it.next().getName() );
100         assertEquals( "section1_", it.next().getName() );
101         assertEquals( "section1", it.next().getName() );
102         assertEquals( "sectionTitle1", it.next().getName() );
103         assertEquals( "sectionTitle1_", it.next().getName() );
104         // this one is missing because we enclose everything in <p> which is not valid xhtml,
105         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
106         //assertEquals( "section1_", it.next().getName() );
107         assertEquals( "paragraph_", it.next().getName() );
108         assertFalse( it.hasNext() );
109     }
110 
111     /** @throws Exception  */
112     public void testNestedHeadingEventsList()
113         throws Exception
114     {
115         // DOXIA-241
116         String text = "<p><h2></h2><h6></h6><h3></h3></p>";
117 
118         parser.parse( text, sink );
119 
120         Iterator<SinkEventElement> it = sink.getEventList().iterator();
121 
122         assertEquals( "paragraph", it.next().getName() );
123         assertEquals( "section1", it.next().getName() );
124         assertEquals( "sectionTitle1", it.next().getName() );
125         assertEquals( "sectionTitle1_", it.next().getName() );
126 
127         assertEquals( "section2", it.next().getName() );
128         assertEquals( "section3", it.next().getName() );
129         assertEquals( "section4", it.next().getName() );
130 
131         assertEquals( "section5", it.next().getName() );
132         assertEquals( "sectionTitle5", it.next().getName() );
133         assertEquals( "sectionTitle5_", it.next().getName() );
134         assertEquals( "section5_", it.next().getName() );
135 
136         assertEquals( "section4_", it.next().getName() );
137         assertEquals( "section3_", it.next().getName() );
138         assertEquals( "section2_", it.next().getName() );
139 
140         assertEquals( "section2", it.next().getName() );
141         assertEquals( "sectionTitle2", it.next().getName() );
142         assertEquals( "sectionTitle2_", it.next().getName() );
143         // these two are missing because we enclose everything in <p> which is not valid xhtml,
144         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
145         //assertEquals( "section2_", it.next().getName() );
146         //assertEquals( "section1_", it.next().getName() );
147         assertEquals( "paragraph_", it.next().getName() );
148         assertFalse( it.hasNext() );
149     }
150 
151     /** @throws Exception  */
152     public void testFigureEventsList()
153         throws Exception
154     {
155         String text = "<img src=\"source\" title=\"caption\" />";
156 
157         parser.parse( text, sink );
158 
159         Iterator<SinkEventElement> it = sink.getEventList().iterator();
160 
161         assertEquals( "figureGraphics", it.next().getName() );
162         assertFalse( it.hasNext() );
163     }
164 
165     /** @throws Exception  */
166     public void testTableEventsList()
167         throws Exception
168     {
169         // TODO: table caption, see DOXIA-177
170 
171         String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
172 
173         parser.parse( text, sink );
174 
175         Iterator<SinkEventElement> it = sink.getEventList().iterator();
176 
177         assertEquals( "table", it.next().getName() );
178         assertEquals( "tableRows", it.next().getName() );
179         assertEquals( "tableRow", it.next().getName() );
180         assertEquals( "tableHeaderCell", it.next().getName() );
181         assertEquals( "text", it.next().getName() );
182         assertEquals( "tableHeaderCell_", it.next().getName() );
183         assertEquals( "tableRow_", it.next().getName() );
184         assertEquals( "tableRow", it.next().getName() );
185         assertEquals( "tableCell", it.next().getName() );
186         assertEquals( "text", it.next().getName() );
187         assertEquals( "tableCell_", it.next().getName() );
188         assertEquals( "tableRow_", it.next().getName() );
189         assertEquals( "tableRows_", it.next().getName() );
190         assertEquals( "table_", it.next().getName() );
191 
192         assertFalse( it.hasNext() );
193     }
194 
195     /** @throws Exception  */
196     public void testSignificantWhiteSpace()
197         throws Exception
198     {
199         // NOTE significant white space
200         String text = "<p><b>word</b> <i>word</i></p>";
201 
202         parser.parse( text, sink );
203 
204         Iterator<SinkEventElement> it = sink.getEventList().iterator();
205 
206         assertEquals( "paragraph", it.next().getName() );
207         assertEquals( "inline", it.next().getName() );
208         assertEquals( "text", it.next().getName() );
209         assertEquals( "inline_", it.next().getName() );
210 
211         SinkEventElement el = it.next();
212         assertEquals( "text", el.getName() );
213         assertEquals( " ",  (String) el.getArgs()[0] );
214 
215         assertEquals( "inline", it.next().getName() );
216         assertEquals( "text", it.next().getName() );
217         assertEquals( "inline_", it.next().getName() );
218         assertEquals( "paragraph_", it.next().getName() );
219         assertFalse( it.hasNext() );
220 
221 
222         // same test with EOL
223         String eol = System.getProperty( "line.separator" );
224         text = "<p><b>word</b>" + eol + "<i>word</i></p>";
225 
226         sink.reset();
227         parser.parse( text, sink );
228         it = sink.getEventList().iterator();
229 
230         assertEquals( "paragraph", it.next().getName() );
231         assertEquals( "inline", it.next().getName() );
232         assertEquals( "text", it.next().getName() );
233         assertEquals( "inline_", it.next().getName() );
234 
235         el = it.next();
236         assertEquals( "text", el.getName() );
237         // according to section 2.11 of the XML spec, parsers must normalize line breaks to "\n"
238         assertEquals( "\n",  (String) el.getArgs()[0] );
239 
240         assertEquals( "inline", it.next().getName() );
241         assertEquals( "text", it.next().getName() );
242         assertEquals( "inline_", it.next().getName() );
243         assertEquals( "paragraph_", it.next().getName() );
244         assertFalse( it.hasNext() );
245 
246 
247         // DOXIA-189: there should be no EOL after closing tag
248         text = "<p>There should be no space after the last <i>word</i>.</p>";
249 
250         sink.reset();
251         parser.parse( text, sink );
252         it = sink.getEventList().iterator();
253 
254         assertEquals( "paragraph", it.next().getName() );
255         assertEquals( "text", it.next().getName() );
256         assertEquals( "inline", it.next().getName() );
257         assertEquals( "text", it.next().getName() );
258         assertEquals( "inline_", it.next().getName() );
259 
260         el = it.next();
261         assertEquals( "text", el.getName() );
262         assertEquals( ".",  (String) el.getArgs()[0] );
263 
264         assertEquals( "paragraph_", it.next().getName() );
265         assertFalse( it.hasNext() );
266     }
267 
268     /** @throws Exception  */
269     public void testPreFormattedText()
270         throws Exception
271     {
272         String text = "<pre><a href=\"what.html\">what</a></pre>";
273 
274         parser.parse( text, sink );
275 
276         Iterator<SinkEventElement> it = sink.getEventList().iterator();
277         assertEquals( "verbatim", it.next().getName() );
278         assertEquals( "link", it.next().getName() );
279         assertEquals( "text", it.next().getName() );
280         assertEquals( "link_", it.next().getName() );
281         assertEquals( "verbatim_", it.next().getName() );
282         assertFalse( it.hasNext() );
283 
284         text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
285         sink.reset();
286         parser.parse( text, sink );
287 
288         it = sink.getEventList().iterator();
289         assertEquals( "verbatim", it.next().getName() );
290         assertEquals( "text", it.next().getName() );
291         assertEquals( "verbatim_", it.next().getName() );
292         assertFalse( it.hasNext() );
293 
294         text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
295         sink.reset();
296         parser.parse( text, sink );
297 
298         it = sink.getEventList().iterator();
299         assertEquals( "verbatim", it.next().getName() );
300         assertEquals( "text", it.next().getName() );
301         assertEquals( "verbatim_", it.next().getName() );
302         assertFalse( it.hasNext() );
303     }
304 
305     /** @throws Exception  */
306     public void testPreEOL()
307         throws Exception
308     {
309         // test EOLs within <pre>: the sink MUST receive a text event for the EOL
310         String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
311                 + "<a href=\"what.html\">what</a></pre>";
312 
313         parser.parse( text, sink );
314 
315         Iterator<SinkEventElement> it = sink.getEventList().iterator();
316 
317         assertEquals( "verbatim", it.next().getName() );
318         assertEquals( "link", it.next().getName() );
319         assertEquals( "text", it.next().getName() );
320         assertEquals( "link_", it.next().getName() );
321         assertEquals( "text", it.next().getName() );
322         assertEquals( "link", it.next().getName() );
323         assertEquals( "text", it.next().getName() );
324         assertEquals( "link_", it.next().getName() );
325         assertEquals( "verbatim_", it.next().getName() );
326     }
327 
328     /** @throws Exception  */
329     public void testDoxia250()
330         throws Exception
331     {
332         StringBuilder sb = new StringBuilder();
333         sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
334         sb.append( "<!ENTITY foo \"&#x159;\">" ).append( XhtmlBaseParser.EOL );
335         sb.append( "<!ENTITY foo1 \"&nbsp;\">" ).append( XhtmlBaseParser.EOL );
336         sb.append( "<!ENTITY foo2 \"&#x161;\">" ).append( XhtmlBaseParser.EOL );
337         sb.append( "<!ENTITY tritPos \"&#x1d7ed;\">" ).append( XhtmlBaseParser.EOL );
338         sb.append( "]>" ).append( XhtmlBaseParser.EOL );
339         sb.append( "<p>&foo;&foo1;&foo2;&tritPos;</p>" );
340 
341         parser.setValidate( false );
342         parser.parse( sb.toString(), sink );
343 
344         Iterator<SinkEventElement> it = sink.getEventList().iterator();
345 
346         SinkEventElement event = it.next();
347         assertEquals( "paragraph", event.getName() );
348 
349         event = it.next();
350         assertEquals( "text", event.getName() );
351         assertEquals( "\u0159",  (String) event.getArgs()[0] );
352 
353         event = it.next();
354         assertEquals( "text", event.getName() );
355         assertEquals( "\u00A0",  (String) event.getArgs()[0] );
356 
357         event = it.next();
358         assertEquals( "text", event.getName() );
359         assertEquals( "\u0161",  (String) event.getArgs()[0] );
360 
361         event = it.next();
362         assertEquals( "text", event.getName() );
363         assertEquals( "\uD835\uDFED",  (String) event.getArgs()[0] );
364 
365         event = it.next();
366         assertEquals( "paragraph_", event.getName() );
367     }
368 
369     /** @throws Exception  */
370     public void testEntities()
371         throws Exception
372     {
373         final String text = "<!DOCTYPE test [<!ENTITY flo \"&#x159;\"><!ENTITY tritPos \"&#x1d7ed;\"><!ENTITY fo \"&#65;\"><!ENTITY myCustom \"&fo;\">]>"
374                 + "<body><h2>&amp;&flo;&#x159;&tritPos;&#x1d7ed;</h2><p>&amp;&flo;&#x159;&tritPos;&#x1d7ed;&myCustom;</p></body>";
375 
376         parser.setValidate( false );
377         parser.parse( text, sink );
378 
379         Iterator<SinkEventElement> it = sink.getEventList().iterator();
380 
381         assertEquals( "section1", it.next().getName() );
382         assertEquals( "sectionTitle1", it.next().getName() );
383 
384         SinkEventElement textEvt = it.next();
385         assertEquals( "text", textEvt.getName() );
386         assertEquals( "&", textEvt.getArgs()[0] );
387 
388         textEvt = it.next();
389         assertEquals( "text", textEvt.getName() );
390         assertEquals( "\u0159", textEvt.getArgs()[0] );
391 
392         textEvt = it.next();
393         assertEquals( "text", textEvt.getName() );
394         assertEquals( "\u0159", textEvt.getArgs()[0] );
395 
396         textEvt = it.next();
397         assertEquals( "text", textEvt.getName() );
398         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
399 
400         textEvt = it.next();
401         assertEquals( "text", textEvt.getName() );
402         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
403 
404         assertEquals( "sectionTitle1_", it.next().getName() );
405         assertEquals( "paragraph", it.next().getName() );
406 
407         textEvt = it.next();
408         assertEquals( "text", textEvt.getName() );
409         assertEquals( "&", textEvt.getArgs()[0] );
410 
411         textEvt = it.next();
412         assertEquals( "text", textEvt.getName() );
413         assertEquals( "\u0159", textEvt.getArgs()[0] );
414 
415         textEvt = it.next();
416         assertEquals( "text", textEvt.getName() );
417         assertEquals( "\u0159", textEvt.getArgs()[0] );
418 
419         textEvt = it.next();
420         assertEquals( "text", textEvt.getName() );
421         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
422 
423         textEvt = it.next();
424         assertEquals( "text", textEvt.getName() );
425         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
426 
427         textEvt = it.next();
428         assertEquals( "text", textEvt.getName() );
429         assertEquals( "A", textEvt.getArgs()[0] );
430 
431         assertEquals( "paragraph_", it.next().getName() );
432 // FIXME
433 //        assertEquals( "section1_", it.next().getName() );
434 
435         assertFalse( it.hasNext() );
436     }
437 
438     /** @throws Exception  */
439     public void testXhtmlEntities()
440         throws Exception
441     {
442         final String text = "<body><h2>&quot;&amp;</h2><p>&apos;&lt;&gt;</p></body>";
443 
444         parser.parse( text, sink );
445 
446         Iterator<SinkEventElement> it = sink.getEventList().iterator();
447 
448         assertEquals( "section1", it.next().getName() );
449         assertEquals( "sectionTitle1", it.next().getName() );
450 
451         SinkEventElement textEvt = it.next();
452         assertEquals( "text", textEvt.getName() );
453         assertEquals( "\"", textEvt.getArgs()[0] );
454 
455         textEvt = it.next();
456         assertEquals( "text", textEvt.getName() );
457         assertEquals( "&", textEvt.getArgs()[0] );
458 
459         assertEquals( "sectionTitle1_", it.next().getName() );
460         assertEquals( "paragraph", it.next().getName() );
461 
462         textEvt = it.next();
463         assertEquals( "text", textEvt.getName() );
464         assertEquals( "\'", textEvt.getArgs()[0] );
465 
466         textEvt = it.next();
467         assertEquals( "text", textEvt.getName() );
468         assertEquals( "<", textEvt.getArgs()[0] );
469 
470         textEvt = it.next();
471         assertEquals( "text", textEvt.getName() );
472         assertEquals( ">", textEvt.getArgs()[0] );
473 
474         assertEquals( "paragraph_", it.next().getName() );
475 
476         assertFalse( it.hasNext() );
477     }
478 
479     /** @throws Exception  */
480     public void testLists()
481         throws Exception
482     {
483         String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
484         parser.parse( text, sink );
485         Iterator<SinkEventElement> it = sink.getEventList().iterator();
486 
487         assertEquals( "division", it.next().getName() );
488         assertEquals( "list", it.next().getName() );
489         assertEquals( "listItem", it.next().getName() );
490         assertEquals( "listItem_", it.next().getName() );
491         assertEquals( "list_", it.next().getName() );
492 
493         assertEquals( "numberedList", it.next().getName() );
494         assertEquals( "numberedListItem", it.next().getName() );
495         assertEquals( "numberedListItem_", it.next().getName() );
496         assertEquals( "numberedList_", it.next().getName() );
497 
498         assertEquals( "definitionList", it.next().getName() );
499         assertEquals( "definitionListItem", it.next().getName() );
500         assertEquals( "definedTerm", it.next().getName() );
501         assertEquals( "definedTerm_", it.next().getName() );
502         assertEquals( "definition", it.next().getName() );
503         assertEquals( "definition_", it.next().getName() );
504         assertEquals( "definitionListItem_", it.next().getName() );
505         assertEquals( "definitionList_", it.next().getName() );
506         assertEquals( "division_", it.next().getName() );
507     }
508 
509     /** @throws Exception  */
510     public void testSimpleTags()
511         throws Exception
512     {
513         String text = "<div><br /><wbr /><hr /><img src=\"img.src\"/></div>";
514         parser.parse( text, sink );
515         Iterator<SinkEventElement> it = sink.getEventList().iterator();
516 
517         assertEquals( "division", it.next().getName() );
518         assertEquals( "lineBreak", it.next().getName() );
519         assertEquals( "lineBreakOpportunity", it.next().getName() );
520         assertEquals( "horizontalRule", it.next().getName() );
521         assertEquals( "figureGraphics", it.next().getName() );
522         assertEquals( "division_", it.next().getName() );
523     }
524 
525     /** @throws Exception  */
526     public void testSemanticTags()
527         throws Exception
528     {
529         String text = "<em><strong><small><s><cite><q><dfn><abbr><i><b><code><var><samp><kbd><sup><sub><u><mark><ruby><rb><rt><rtc><rp><bdi><bdo><span><ins><del>a text &amp; &#xc6;</del></ins></span></bdo></bdi></rp></rtc></rt></rb></ruby></mark></u></sub></sup></kbd></samp></var></code></b></i></abbr></dfn></q></cite></s></small></strong></em>";
530         parser.parse( text, sink );
531         Iterator<SinkEventElement> it = sink.getEventList().iterator();
532 
533         SinkEventElement event = it.next();
534         assertEquals( "inline", event.getName() );
535         assertEquals( "semantics=emphasis",  event.getArgs()[0].toString().trim() );
536 
537         event = it.next();
538         assertEquals( "inline", event.getName() );
539         assertEquals( "semantics=strong",  event.getArgs()[0].toString().trim() );
540 
541         event = it.next();
542         assertEquals( "inline", event.getName() );
543         assertEquals( "semantics=small",  event.getArgs()[0].toString().trim() );
544 
545         event = it.next();
546         assertEquals( "inline", event.getName() );
547         assertEquals( "semantics=line-through",  event.getArgs()[0].toString().trim() );
548 
549         event = it.next();
550         assertEquals( "inline", event.getName() );
551         assertEquals( "semantics=citation",  event.getArgs()[0].toString().trim() );
552 
553         event = it.next();
554         assertEquals( "inline", event.getName() );
555         assertEquals( "semantics=quote",  event.getArgs()[0].toString().trim() );
556 
557         event = it.next();
558         assertEquals( "inline", event.getName() );
559         assertEquals( "semantics=definition",  event.getArgs()[0].toString().trim() );
560 
561         event = it.next();
562         assertEquals( "inline", event.getName() );
563         assertEquals( "semantics=abbreviation",  event.getArgs()[0].toString().trim() );
564 
565         event = it.next();
566         assertEquals( "inline", event.getName() );
567         assertEquals( "semantics=italic",  event.getArgs()[0].toString().trim() );
568 
569         event = it.next();
570         assertEquals( "inline", event.getName() );
571         assertEquals( "semantics=bold",  event.getArgs()[0].toString().trim() );
572 
573         event = it.next();
574         assertEquals( "inline", event.getName() );
575         assertEquals( "semantics=code",  event.getArgs()[0].toString().trim() );
576 
577         event = it.next();
578         assertEquals( "inline", event.getName() );
579         assertEquals( "semantics=variable",  event.getArgs()[0].toString().trim() );
580 
581         event = it.next();
582         assertEquals( "inline", event.getName() );
583         assertEquals( "semantics=sample",  event.getArgs()[0].toString().trim() );
584 
585         event = it.next();
586         assertEquals( "inline", event.getName() );
587         assertEquals( "semantics=keyboard",  event.getArgs()[0].toString().trim() );
588 
589         event = it.next();
590         assertEquals( "inline", event.getName() );
591         assertEquals( "semantics=superscript",  event.getArgs()[0].toString().trim() );
592 
593         event = it.next();
594         assertEquals( "inline", event.getName() );
595         assertEquals( "semantics=subscript",  event.getArgs()[0].toString().trim() );
596 
597         event = it.next();
598         assertEquals( "inline", event.getName() );
599         assertEquals( "semantics=annotation",  event.getArgs()[0].toString().trim() );
600 
601         event = it.next();
602         assertEquals( "inline", event.getName() );
603         assertEquals( "semantics=highlight",  event.getArgs()[0].toString().trim() );
604 
605         event = it.next();
606         assertEquals( "inline", event.getName() );
607         assertEquals( "semantics=ruby",  event.getArgs()[0].toString().trim() );
608 
609         event = it.next();
610         assertEquals( "inline", event.getName() );
611         assertEquals( "semantics=rubyBase",  event.getArgs()[0].toString().trim() );
612 
613         event = it.next();
614         assertEquals( "inline", event.getName() );
615         assertEquals( "semantics=rubyText",  event.getArgs()[0].toString().trim() );
616 
617         event = it.next();
618         assertEquals( "inline", event.getName() );
619         assertEquals( "semantics=rubyTextContainer",  event.getArgs()[0].toString().trim() );
620 
621         event = it.next();
622         assertEquals( "inline", event.getName() );
623         assertEquals( "semantics=rubyParentheses",  event.getArgs()[0].toString().trim() );
624 
625         event = it.next();
626         assertEquals( "inline", event.getName() );
627         assertEquals( "semantics=bidirectionalIsolation",  event.getArgs()[0].toString().trim() );
628 
629         event = it.next();
630         assertEquals( "inline", event.getName() );
631         assertEquals( "semantics=bidirectionalOverride",  event.getArgs()[0].toString().trim() );
632 
633         event = it.next();
634         assertEquals( "inline", event.getName() );
635         assertEquals( "semantics=phrase",  event.getArgs()[0].toString().trim() );
636 
637         event = it.next();
638         assertEquals( "inline", event.getName() );
639         assertEquals( "semantics=insert",  event.getArgs()[0].toString().trim() );
640 
641         event = it.next();
642         assertEquals( "inline", event.getName() );
643         assertEquals( "semantics=delete",  event.getArgs()[0].toString().trim() );
644 
645         assertEquals( "text", it.next().getName() );
646         assertEquals( "text", it.next().getName() );
647         assertEquals( "text", it.next().getName() );
648         assertEquals( "text", it.next().getName() );
649 
650         assertEquals( "inline_", it.next().getName() );
651         assertEquals( "inline_", it.next().getName() );
652         assertEquals( "inline_", it.next().getName() );
653         assertEquals( "inline_", it.next().getName() );
654         assertEquals( "inline_", it.next().getName() );
655         assertEquals( "inline_", it.next().getName() );
656         assertEquals( "inline_", it.next().getName() );
657         assertEquals( "inline_", it.next().getName() );
658         assertEquals( "inline_", it.next().getName() );
659         assertEquals( "inline_", it.next().getName() );
660         assertEquals( "inline_", it.next().getName() );
661         assertEquals( "inline_", it.next().getName() );
662         assertEquals( "inline_", it.next().getName() );
663         assertEquals( "inline_", it.next().getName() );
664         assertEquals( "inline_", it.next().getName() );
665         assertEquals( "inline_", it.next().getName() );
666         assertEquals( "inline_", it.next().getName() );
667         assertEquals( "inline_", it.next().getName() );
668         assertEquals( "inline_", it.next().getName() );
669         assertEquals( "inline_", it.next().getName() );
670         assertEquals( "inline_", it.next().getName() );
671         assertEquals( "inline_", it.next().getName() );
672         assertEquals( "inline_", it.next().getName() );
673         assertEquals( "inline_", it.next().getName() );
674         assertEquals( "inline_", it.next().getName() );
675         assertEquals( "inline_", it.next().getName() );
676         assertEquals( "inline_", it.next().getName() );
677         assertEquals( "inline_", it.next().getName() );
678 
679     }
680 
681     /** @throws Exception  */
682     public void testSpecial()
683         throws Exception
684     {
685         String text = "<p><!-- a pagebreak: --><!-- PB -->&nbsp;&#160;<unknown /></p>";
686         parser.parse( text, sink );
687         Iterator<SinkEventElement> it = sink.getEventList().iterator();
688 
689         assertEquals( "paragraph", it.next().getName() );
690         assertEquals( "comment", it.next().getName() );
691         assertEquals( "pageBreak", it.next().getName() );
692         assertEquals( "nonBreakingSpace", it.next().getName() );
693         assertEquals( "nonBreakingSpace", it.next().getName() );
694         // unknown events are not reported by the base parser
695         assertEquals( "paragraph_", it.next().getName() );
696     }
697 
698     /** @throws Exception  */
699     public void testTable()
700         throws Exception
701     {
702         String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
703         parser.parse( text, sink );
704         Iterator<SinkEventElement> it = sink.getEventList().iterator();
705 
706         assertEquals( "table", it.next().getName() );
707 
708         // DOXIA-374
709         SinkEventElement el = it.next();
710         assertEquals( "tableRows", el.getName() );
711         assertFalse( (Boolean) el.getArgs()[1] );
712 
713         assertEquals( "tableCaption", it.next().getName() );
714         assertEquals( "tableCaption_", it.next().getName() );
715         assertEquals( "tableRow", it.next().getName() );
716         assertEquals( "tableHeaderCell", it.next().getName() );
717         assertEquals( "tableHeaderCell_", it.next().getName() );
718         assertEquals( "tableRow_", it.next().getName() );
719         assertEquals( "tableRow", it.next().getName() );
720         assertEquals( "tableCell", it.next().getName() );
721         assertEquals( "tableCell_", it.next().getName() );
722         assertEquals( "tableRow_", it.next().getName() );
723         assertEquals( "tableRows_", it.next().getName() );
724         assertEquals( "table_", it.next().getName() );
725     }
726 
727     /** @throws Exception  */
728     public void testFigure()
729         throws Exception
730     {
731         String text = "<figure><img src=\"src.jpg\"/><figcaption></figcaption></figure>";
732         parser.parse( text, sink );
733         Iterator<SinkEventElement> it = sink.getEventList().iterator();
734 
735         assertEquals( "figure", it.next().getName() );
736         assertEquals( "figureGraphics", it.next().getName() );
737         assertEquals( "figureCaption", it.next().getName() );
738         assertEquals( "figureCaption_", it.next().getName() );
739         assertEquals( "figure_", it.next().getName() );
740     }
741 
742     /** @throws Exception  */
743     public void testAnchorLink()
744         throws Exception
745     {
746         String text = "<div><a href=\"\"></a>" +
747                 "<a href=\"valid\"></a>" +
748                 "<a href=\"#1invalid\"></a>" +
749                 "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
750                 "<a name=\"valid\"></a>" +
751                 "<a name=\"1invalid\"></a>" +
752                 "<a id=\"1invalid\"></a></div>";
753 
754         parser.parse( text, sink );
755         Iterator<SinkEventElement> it = sink.getEventList().iterator();
756 
757         SinkEventElement element = it.next();
758         assertEquals( "division", element.getName() );
759 
760         element = it.next();
761         assertEquals( "link", element.getName() );
762         assertEquals( "", element.getArgs()[0] );
763         assertEquals( "link_", it.next().getName() );
764 
765         element = it.next();
766         assertEquals( "link", element.getName() );
767         assertEquals( "valid", element.getArgs()[0] );
768         assertEquals( "link_", it.next().getName() );
769 
770         element = it.next();
771         assertEquals( "link", element.getName() );
772         assertEquals( "#a1invalid", element.getArgs()[0] );
773         assertEquals( "link_", it.next().getName() );
774 
775         element = it.next();
776         assertEquals( "link", element.getName() );
777         assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
778         assertEquals( "link_", it.next().getName() );
779 
780         element = it.next();
781         assertEquals( "anchor", element.getName() );
782         assertEquals( "valid", element.getArgs()[0] );
783         assertEquals( "anchor_", it.next().getName() );
784 
785         element = it.next();
786         assertEquals( "anchor", element.getName() );
787         assertEquals( "a1invalid", element.getArgs()[0] );
788         assertEquals( "anchor_", it.next().getName() );
789 
790         element = it.next();
791         assertEquals( "anchor", element.getName() );
792         assertEquals( "a1invalid", element.getArgs()[0] );
793         assertEquals( "anchor_", it.next().getName() );
794 
795         element = it.next();
796         assertEquals( "division_", element.getName() );
797     }
798 
799     /**
800      * Test entities in attributes.
801      *
802      * @throws java.lang.Exception if any.
803      */
804     public void testAttributeEntities()
805         throws Exception
806     {
807         String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&amp;l=e\"></script>";
808 
809         parser.parse( text, sink );
810 
811         Iterator<SinkEventElement> it = sink.getEventList().iterator();
812 
813         SinkEventElement event = it.next();
814 
815         assertEquals( "unknown", event.getName() );
816         assertEquals( "script", event.getArgs()[0] );
817         SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
818         // ampersand should be un-escaped
819         assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
820         assertEquals( "unknown", it.next().getName() );
821         assertFalse( it.hasNext() );
822 
823         sink.reset();
824         text = "<img src=\"http://ex.com/ex.jpg?v=l&amp;l=e\" alt=\"image\"/>";
825         parser.parse( text, sink );
826 
827         it = sink.getEventList().iterator();
828         event = it.next();
829         assertEquals( "figureGraphics", event.getName() );
830         attribs = (SinkEventAttributeSet) event.getArgs()[1];
831         // ampersand should be un-escaped
832         assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
833     }
834 
835     public void testUnbalancedDefinitionListItem() throws Exception
836     {
837         String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
838                         "<dl><dd>value</dd></dl>" +
839                         "<dl><dt>key</dt></dl>" +
840                         "<dl></dl>" +
841                         "<dl><dd>value</dd><dt>key</dt></dl></body>";
842 
843         parser.parse( text, sink );
844 
845         Iterator<SinkEventElement> it = sink.getEventList().iterator();
846         assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
847                           "definition", "text", "definition_", "definitionListItem_", "definitionList_" );
848         assertStartsWith( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
849                           "definitionListItem_", "definitionList_" );
850         assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
851                           "definitionListItem_", "definitionList_" );
852         assertStartsWith( it, "definitionList", "definitionList_" );
853         assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
854                           "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
855                           "definitionListItem_", "definitionList_" );
856     }
857 }