1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.util.Iterator;
23
24 import org.apache.maven.doxia.logging.Log;
25 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
26 import org.apache.maven.doxia.sink.impl.SinkEventElement;
27 import org.apache.maven.doxia.sink.impl.SinkEventTestingSink;
28
29 import static org.junit.Assert.assertNotEquals;
30
31
32
33
34 public class Xhtml5BaseParserTest
35 extends AbstractParserTest
36 {
37 private Xhtml5BaseParser parser;
38 private final SinkEventTestingSink sink = new SinkEventTestingSink();
39
40
41 @Override
42 protected Parser createParser()
43 {
44 parser = new Xhtml5BaseParser();
45 parser.getLog().setLogLevel( Log.LEVEL_ERROR );
46 return parser;
47 }
48
49 @Override
50 protected String outputExtension()
51 {
52 return "xhtml";
53 }
54
55 @Override
56 protected void setUp() throws Exception
57 {
58 super.setUp();
59
60 parser = new Xhtml5BaseParser();
61 parser.getLog().setLogLevel( Log.LEVEL_ERROR );
62 sink.reset();
63 }
64
65
66 public void testDoxiaVersion()
67 {
68 assertNotNull( XhtmlBaseParser.doxiaVersion() );
69 assertNotEquals( "unknown", XhtmlBaseParser.doxiaVersion() );
70 }
71
72 public void testHeadingEventsList()
73 throws Exception
74 {
75 String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
76
77 parser.parse( text, sink );
78
79 Iterator<SinkEventElement> it = sink.getEventList().iterator();
80
81 assertEquals( "paragraph", it.next().getName() );
82 assertEquals( "section1", it.next().getName() );
83 assertEquals( "sectionTitle1", it.next().getName() );
84 assertEquals( "sectionTitle1_", it.next().getName() );
85 assertEquals( "section2", it.next().getName() );
86 assertEquals( "sectionTitle2", it.next().getName() );
87 assertEquals( "sectionTitle2_", it.next().getName() );
88 assertEquals( "section3", it.next().getName() );
89 assertEquals( "sectionTitle3", it.next().getName() );
90 assertEquals( "sectionTitle3_", it.next().getName() );
91 assertEquals( "section4", it.next().getName() );
92 assertEquals( "sectionTitle4", it.next().getName() );
93 assertEquals( "sectionTitle4_", it.next().getName() );
94 assertEquals( "section5", it.next().getName() );
95 assertEquals( "sectionTitle5", it.next().getName() );
96 assertEquals( "sectionTitle5_", it.next().getName() );
97 assertEquals( "section5_", it.next().getName() );
98 assertEquals( "section4_", it.next().getName() );
99 assertEquals( "section3_", it.next().getName() );
100 assertEquals( "section2_", it.next().getName() );
101 assertEquals( "section1_", it.next().getName() );
102 assertEquals( "section1", it.next().getName() );
103 assertEquals( "sectionTitle1", it.next().getName() );
104 assertEquals( "sectionTitle1_", it.next().getName() );
105
106
107
108 assertEquals( "paragraph_", it.next().getName() );
109 assertFalse( it.hasNext() );
110 }
111
112 public void testNestedHeadingEventsList()
113 throws Exception
114 {
115
116 String text = "<p><h2></h2><h6></h6><h3></h3></p>";
117
118 parser.parse( text, sink );
119
120 Iterator<SinkEventElement> it = sink.getEventList().iterator();
121
122 assertEquals( "paragraph", it.next().getName() );
123 assertEquals( "section1", it.next().getName() );
124 assertEquals( "sectionTitle1", it.next().getName() );
125 assertEquals( "sectionTitle1_", it.next().getName() );
126
127 assertEquals( "section2", it.next().getName() );
128 assertEquals( "section3", it.next().getName() );
129 assertEquals( "section4", it.next().getName() );
130
131 assertEquals( "section5", it.next().getName() );
132 assertEquals( "sectionTitle5", it.next().getName() );
133 assertEquals( "sectionTitle5_", it.next().getName() );
134 assertEquals( "section5_", it.next().getName() );
135
136 assertEquals( "section4_", it.next().getName() );
137 assertEquals( "section3_", it.next().getName() );
138 assertEquals( "section2_", it.next().getName() );
139
140 assertEquals( "section2", it.next().getName() );
141 assertEquals( "sectionTitle2", it.next().getName() );
142 assertEquals( "sectionTitle2_", it.next().getName() );
143
144
145
146
147 assertEquals( "paragraph_", it.next().getName() );
148 assertFalse( it.hasNext() );
149 }
150
151 public void testFigureEventsList()
152 throws Exception
153 {
154 String text = "<img src=\"source\" title=\"caption\" />";
155
156 parser.parse( text, sink );
157
158 Iterator<SinkEventElement> it = sink.getEventList().iterator();
159
160 assertEquals( "figureGraphics", it.next().getName() );
161 assertFalse( it.hasNext() );
162 }
163
164 public void testTableEventsList()
165 throws Exception
166 {
167
168
169 String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
170
171 parser.parse( text, sink );
172
173 Iterator<SinkEventElement> it = sink.getEventList().iterator();
174
175 assertEquals( "table", it.next().getName() );
176 assertEquals( "tableRows", it.next().getName() );
177 assertEquals( "tableRow", it.next().getName() );
178 assertEquals( "tableHeaderCell", it.next().getName() );
179 assertEquals( "text", it.next().getName() );
180 assertEquals( "tableHeaderCell_", it.next().getName() );
181 assertEquals( "tableRow_", it.next().getName() );
182 assertEquals( "tableRow", it.next().getName() );
183 assertEquals( "tableCell", it.next().getName() );
184 assertEquals( "text", it.next().getName() );
185 assertEquals( "tableCell_", it.next().getName() );
186 assertEquals( "tableRow_", it.next().getName() );
187 assertEquals( "tableRows_", it.next().getName() );
188 assertEquals( "table_", it.next().getName() );
189
190 assertFalse( it.hasNext() );
191 }
192
193 public void testSignificantWhiteSpace()
194 throws Exception
195 {
196
197 String text = "<p><b>word</b> <i>word</i></p>";
198
199 parser.parse( text, sink );
200
201 Iterator<SinkEventElement> it = sink.getEventList().iterator();
202
203 assertEquals( "paragraph", it.next().getName() );
204 assertEquals( "inline", it.next().getName() );
205 assertEquals( "text", it.next().getName() );
206 assertEquals( "inline_", it.next().getName() );
207
208 SinkEventElement el = it.next();
209 assertEquals( "text", el.getName() );
210 assertEquals( " ", (String) el.getArgs()[0] );
211
212 assertEquals( "inline", it.next().getName() );
213 assertEquals( "text", it.next().getName() );
214 assertEquals( "inline_", it.next().getName() );
215 assertEquals( "paragraph_", it.next().getName() );
216 assertFalse( it.hasNext() );
217
218
219
220 String eol = System.getProperty( "line.separator" );
221 text = "<p><b>word</b>" + eol + "<i>word</i></p>";
222
223 sink.reset();
224 parser.parse( text, sink );
225 it = sink.getEventList().iterator();
226
227 assertEquals( "paragraph", it.next().getName() );
228 assertEquals( "inline", it.next().getName() );
229 assertEquals( "text", it.next().getName() );
230 assertEquals( "inline_", it.next().getName() );
231
232 el = it.next();
233 assertEquals( "text", el.getName() );
234
235 assertEquals( "\n", (String) el.getArgs()[0] );
236
237 assertEquals( "inline", it.next().getName() );
238 assertEquals( "text", it.next().getName() );
239 assertEquals( "inline_", it.next().getName() );
240 assertEquals( "paragraph_", it.next().getName() );
241 assertFalse( it.hasNext() );
242
243
244
245 text = "<p>There should be no space after the last <i>word</i>.</p>";
246
247 sink.reset();
248 parser.parse( text, sink );
249 it = sink.getEventList().iterator();
250
251 assertEquals( "paragraph", it.next().getName() );
252 assertEquals( "text", it.next().getName() );
253 assertEquals( "inline", it.next().getName() );
254 assertEquals( "text", it.next().getName() );
255 assertEquals( "inline_", it.next().getName() );
256
257 el = it.next();
258 assertEquals( "text", el.getName() );
259 assertEquals( ".", (String) el.getArgs()[0] );
260
261 assertEquals( "paragraph_", it.next().getName() );
262 assertFalse( it.hasNext() );
263 }
264
265 public void testPreFormattedText()
266 throws Exception
267 {
268 String text = "<pre><a href=\"what.html\">what</a></pre>";
269
270 parser.parse( text, sink );
271
272 Iterator<SinkEventElement> it = sink.getEventList().iterator();
273 assertEquals( "verbatim", it.next().getName() );
274 assertEquals( "link", it.next().getName() );
275 assertEquals( "text", it.next().getName() );
276 assertEquals( "link_", it.next().getName() );
277 assertEquals( "verbatim_", it.next().getName() );
278 assertFalse( it.hasNext() );
279
280 text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
281 sink.reset();
282 parser.parse( text, sink );
283
284 it = sink.getEventList().iterator();
285 assertEquals( "verbatim", it.next().getName() );
286 assertEquals( "text", it.next().getName() );
287 assertEquals( "verbatim_", it.next().getName() );
288 assertFalse( it.hasNext() );
289
290 text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
291 sink.reset();
292 parser.parse( text, sink );
293
294 it = sink.getEventList().iterator();
295 assertEquals( "verbatim", it.next().getName() );
296 assertEquals( "text", it.next().getName() );
297 assertEquals( "verbatim_", it.next().getName() );
298 assertFalse( it.hasNext() );
299 }
300
301 public void testPreEOL()
302 throws Exception
303 {
304
305 String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
306 + "<a href=\"what.html\">what</a></pre>";
307
308 parser.parse( text, sink );
309
310 Iterator<SinkEventElement> it = sink.getEventList().iterator();
311
312 assertEquals( "verbatim", it.next().getName() );
313 assertEquals( "link", it.next().getName() );
314 assertEquals( "text", it.next().getName() );
315 assertEquals( "link_", it.next().getName() );
316 assertEquals( "text", it.next().getName() );
317 assertEquals( "link", it.next().getName() );
318 assertEquals( "text", it.next().getName() );
319 assertEquals( "link_", it.next().getName() );
320 assertEquals( "verbatim_", it.next().getName() );
321 }
322
323 public void testDoxia250()
324 throws Exception
325 {
326 StringBuilder sb = new StringBuilder();
327 sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
328 sb.append( "<!ENTITY foo \"ř\">" ).append( XhtmlBaseParser.EOL );
329 sb.append( "<!ENTITY foo1 \" \">" ).append( XhtmlBaseParser.EOL );
330 sb.append( "<!ENTITY foo2 \"š\">" ).append( XhtmlBaseParser.EOL );
331 sb.append( "<!ENTITY tritPos \"𝟭\">" ).append( XhtmlBaseParser.EOL );
332 sb.append( "]>" ).append( XhtmlBaseParser.EOL );
333 sb.append( "<p>&foo;&foo1;&foo2;&tritPos;</p>" );
334
335 parser.setValidate( false );
336 parser.parse( sb.toString(), sink );
337
338 Iterator<SinkEventElement> it = sink.getEventList().iterator();
339
340 SinkEventElement event = it.next();
341 assertEquals( "paragraph", event.getName() );
342
343 event = it.next();
344 assertEquals( "text", event.getName() );
345 assertEquals( "\u0159", (String) event.getArgs()[0] );
346
347 event = it.next();
348 assertEquals( "text", event.getName() );
349 assertEquals( "\u00A0", (String) event.getArgs()[0] );
350
351 event = it.next();
352 assertEquals( "text", event.getName() );
353 assertEquals( "\u0161", (String) event.getArgs()[0] );
354
355 event = it.next();
356 assertEquals( "text", event.getName() );
357 assertEquals( "\uD835\uDFED", (String) event.getArgs()[0] );
358
359 event = it.next();
360 assertEquals( "paragraph_", event.getName() );
361 }
362
363 public void testEntities()
364 throws Exception
365 {
366 final String text = "<!DOCTYPE test [<!ENTITY flo \"ř\"><!ENTITY tritPos \"𝟭\"><!ENTITY fo \"A\"><!ENTITY myCustom \"&fo;\">]>"
367 + "<body><h2>&&flo;ř&tritPos;𝟭</h2><p>&&flo;ř&tritPos;𝟭&myCustom;</p></body>";
368
369 parser.setValidate( false );
370 parser.parse( text, sink );
371
372 Iterator<SinkEventElement> it = sink.getEventList().iterator();
373
374 assertEquals( "section1", it.next().getName() );
375 assertEquals( "sectionTitle1", it.next().getName() );
376
377 SinkEventElement textEvt = it.next();
378 assertEquals( "text", textEvt.getName() );
379 assertEquals( "&", textEvt.getArgs()[0] );
380
381 textEvt = it.next();
382 assertEquals( "text", textEvt.getName() );
383 assertEquals( "\u0159", textEvt.getArgs()[0] );
384
385 textEvt = it.next();
386 assertEquals( "text", textEvt.getName() );
387 assertEquals( "\u0159", textEvt.getArgs()[0] );
388
389 textEvt = it.next();
390 assertEquals( "text", textEvt.getName() );
391 assertEquals( "\uD835\uDFED", (String) textEvt.getArgs()[0] );
392
393 textEvt = it.next();
394 assertEquals( "text", textEvt.getName() );
395 assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
396
397 assertEquals( "sectionTitle1_", it.next().getName() );
398 assertEquals( "paragraph", it.next().getName() );
399
400 textEvt = it.next();
401 assertEquals( "text", textEvt.getName() );
402 assertEquals( "&", textEvt.getArgs()[0] );
403
404 textEvt = it.next();
405 assertEquals( "text", textEvt.getName() );
406 assertEquals( "\u0159", textEvt.getArgs()[0] );
407
408 textEvt = it.next();
409 assertEquals( "text", textEvt.getName() );
410 assertEquals( "\u0159", textEvt.getArgs()[0] );
411
412 textEvt = it.next();
413 assertEquals( "text", textEvt.getName() );
414 assertEquals( "\uD835\uDFED", (String) textEvt.getArgs()[0] );
415
416 textEvt = it.next();
417 assertEquals( "text", textEvt.getName() );
418 assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
419
420 textEvt = it.next();
421 assertEquals( "text", textEvt.getName() );
422 assertEquals( "A", textEvt.getArgs()[0] );
423
424 assertEquals( "paragraph_", it.next().getName() );
425
426
427
428 assertFalse( it.hasNext() );
429 }
430
431 public void testXhtmlEntities()
432 throws Exception
433 {
434 final String text = "<body><h2>"&</h2><p>'<></p></body>";
435
436 parser.parse( text, sink );
437
438 Iterator<SinkEventElement> it = sink.getEventList().iterator();
439
440 assertEquals( "section1", it.next().getName() );
441 assertEquals( "sectionTitle1", it.next().getName() );
442
443 SinkEventElement textEvt = it.next();
444 assertEquals( "text", textEvt.getName() );
445 assertEquals( "\"", textEvt.getArgs()[0] );
446
447 textEvt = it.next();
448 assertEquals( "text", textEvt.getName() );
449 assertEquals( "&", textEvt.getArgs()[0] );
450
451 assertEquals( "sectionTitle1_", it.next().getName() );
452 assertEquals( "paragraph", it.next().getName() );
453
454 textEvt = it.next();
455 assertEquals( "text", textEvt.getName() );
456 assertEquals( "\'", textEvt.getArgs()[0] );
457
458 textEvt = it.next();
459 assertEquals( "text", textEvt.getName() );
460 assertEquals( "<", textEvt.getArgs()[0] );
461
462 textEvt = it.next();
463 assertEquals( "text", textEvt.getName() );
464 assertEquals( ">", textEvt.getArgs()[0] );
465
466 assertEquals( "paragraph_", it.next().getName() );
467
468 assertFalse( it.hasNext() );
469 }
470
471 public void testLists()
472 throws Exception
473 {
474 String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
475 parser.parse( text, sink );
476 Iterator<SinkEventElement> it = sink.getEventList().iterator();
477
478 assertEquals( "division", it.next().getName() );
479 assertEquals( "list", it.next().getName() );
480 assertEquals( "listItem", it.next().getName() );
481 assertEquals( "listItem_", it.next().getName() );
482 assertEquals( "list_", it.next().getName() );
483
484 assertEquals( "numberedList", it.next().getName() );
485 assertEquals( "numberedListItem", it.next().getName() );
486 assertEquals( "numberedListItem_", it.next().getName() );
487 assertEquals( "numberedList_", it.next().getName() );
488
489 assertEquals( "definitionList", it.next().getName() );
490 assertEquals( "definitionListItem", it.next().getName() );
491 assertEquals( "definedTerm", it.next().getName() );
492 assertEquals( "definedTerm_", it.next().getName() );
493 assertEquals( "definition", it.next().getName() );
494 assertEquals( "definition_", it.next().getName() );
495 assertEquals( "definitionListItem_", it.next().getName() );
496 assertEquals( "definitionList_", it.next().getName() );
497 assertEquals( "division_", it.next().getName() );
498 }
499
500 public void testSimpleTags()
501 throws Exception
502 {
503 String text = "<div><br /><wbr /><hr /><img src=\"img.src\"/></div>";
504 parser.parse( text, sink );
505 Iterator<SinkEventElement> it = sink.getEventList().iterator();
506
507 assertEquals( "division", it.next().getName() );
508 assertEquals( "lineBreak", it.next().getName() );
509 assertEquals( "lineBreakOpportunity", it.next().getName() );
510 assertEquals( "horizontalRule", it.next().getName() );
511 assertEquals( "figureGraphics", it.next().getName() );
512 assertEquals( "division_", it.next().getName() );
513 }
514
515 public void testSemanticTags()
516 throws Exception
517 {
518 String text = "<em><strong><small><s><cite><q><dfn><abbr><i><b><code><var><samp><kbd><sup><sub><u><mark><ruby><rb><rt><rtc><rp><bdi><bdo><span><ins><del>a text & Æ</del></ins></span></bdo></bdi></rp></rtc></rt></rb></ruby></mark></u></sub></sup></kbd></samp></var></code></b></i></abbr></dfn></q></cite></s></small></strong></em>";
519 parser.parse( text, sink );
520 Iterator<SinkEventElement> it = sink.getEventList().iterator();
521
522 SinkEventElement event = it.next();
523 assertEquals( "inline", event.getName() );
524 assertEquals( "semantics=emphasis", event.getArgs()[0].toString().trim() );
525
526 event = it.next();
527 assertEquals( "inline", event.getName() );
528 assertEquals( "semantics=strong", event.getArgs()[0].toString().trim() );
529
530 event = it.next();
531 assertEquals( "inline", event.getName() );
532 assertEquals( "semantics=small", event.getArgs()[0].toString().trim() );
533
534 event = it.next();
535 assertEquals( "inline", event.getName() );
536 assertEquals( "semantics=line-through", event.getArgs()[0].toString().trim() );
537
538 event = it.next();
539 assertEquals( "inline", event.getName() );
540 assertEquals( "semantics=citation", event.getArgs()[0].toString().trim() );
541
542 event = it.next();
543 assertEquals( "inline", event.getName() );
544 assertEquals( "semantics=quote", event.getArgs()[0].toString().trim() );
545
546 event = it.next();
547 assertEquals( "inline", event.getName() );
548 assertEquals( "semantics=definition", event.getArgs()[0].toString().trim() );
549
550 event = it.next();
551 assertEquals( "inline", event.getName() );
552 assertEquals( "semantics=abbreviation", event.getArgs()[0].toString().trim() );
553
554 event = it.next();
555 assertEquals( "inline", event.getName() );
556 assertEquals( "semantics=italic", event.getArgs()[0].toString().trim() );
557
558 event = it.next();
559 assertEquals( "inline", event.getName() );
560 assertEquals( "semantics=bold", event.getArgs()[0].toString().trim() );
561
562 event = it.next();
563 assertEquals( "inline", event.getName() );
564 assertEquals( "semantics=code", event.getArgs()[0].toString().trim() );
565
566 event = it.next();
567 assertEquals( "inline", event.getName() );
568 assertEquals( "semantics=variable", event.getArgs()[0].toString().trim() );
569
570 event = it.next();
571 assertEquals( "inline", event.getName() );
572 assertEquals( "semantics=sample", event.getArgs()[0].toString().trim() );
573
574 event = it.next();
575 assertEquals( "inline", event.getName() );
576 assertEquals( "semantics=keyboard", event.getArgs()[0].toString().trim() );
577
578 event = it.next();
579 assertEquals( "inline", event.getName() );
580 assertEquals( "semantics=superscript", event.getArgs()[0].toString().trim() );
581
582 event = it.next();
583 assertEquals( "inline", event.getName() );
584 assertEquals( "semantics=subscript", event.getArgs()[0].toString().trim() );
585
586 event = it.next();
587 assertEquals( "inline", event.getName() );
588 assertEquals( "semantics=annotation", event.getArgs()[0].toString().trim() );
589
590 event = it.next();
591 assertEquals( "inline", event.getName() );
592 assertEquals( "semantics=highlight", event.getArgs()[0].toString().trim() );
593
594 event = it.next();
595 assertEquals( "inline", event.getName() );
596 assertEquals( "semantics=ruby", event.getArgs()[0].toString().trim() );
597
598 event = it.next();
599 assertEquals( "inline", event.getName() );
600 assertEquals( "semantics=rubyBase", event.getArgs()[0].toString().trim() );
601
602 event = it.next();
603 assertEquals( "inline", event.getName() );
604 assertEquals( "semantics=rubyText", event.getArgs()[0].toString().trim() );
605
606 event = it.next();
607 assertEquals( "inline", event.getName() );
608 assertEquals( "semantics=rubyTextContainer", event.getArgs()[0].toString().trim() );
609
610 event = it.next();
611 assertEquals( "inline", event.getName() );
612 assertEquals( "semantics=rubyParentheses", event.getArgs()[0].toString().trim() );
613
614 event = it.next();
615 assertEquals( "inline", event.getName() );
616 assertEquals( "semantics=bidirectionalIsolation", event.getArgs()[0].toString().trim() );
617
618 event = it.next();
619 assertEquals( "inline", event.getName() );
620 assertEquals( "semantics=bidirectionalOverride", event.getArgs()[0].toString().trim() );
621
622 event = it.next();
623 assertEquals( "inline", event.getName() );
624 assertEquals( "semantics=phrase", event.getArgs()[0].toString().trim() );
625
626 event = it.next();
627 assertEquals( "inline", event.getName() );
628 assertEquals( "semantics=insert", event.getArgs()[0].toString().trim() );
629
630 event = it.next();
631 assertEquals( "inline", event.getName() );
632 assertEquals( "semantics=delete", event.getArgs()[0].toString().trim() );
633
634 assertEquals( "text", it.next().getName() );
635 assertEquals( "text", it.next().getName() );
636 assertEquals( "text", it.next().getName() );
637 assertEquals( "text", it.next().getName() );
638
639 assertEquals( "inline_", it.next().getName() );
640 assertEquals( "inline_", it.next().getName() );
641 assertEquals( "inline_", it.next().getName() );
642 assertEquals( "inline_", it.next().getName() );
643 assertEquals( "inline_", it.next().getName() );
644 assertEquals( "inline_", it.next().getName() );
645 assertEquals( "inline_", it.next().getName() );
646 assertEquals( "inline_", it.next().getName() );
647 assertEquals( "inline_", it.next().getName() );
648 assertEquals( "inline_", it.next().getName() );
649 assertEquals( "inline_", it.next().getName() );
650 assertEquals( "inline_", it.next().getName() );
651 assertEquals( "inline_", it.next().getName() );
652 assertEquals( "inline_", it.next().getName() );
653 assertEquals( "inline_", it.next().getName() );
654 assertEquals( "inline_", it.next().getName() );
655 assertEquals( "inline_", it.next().getName() );
656 assertEquals( "inline_", it.next().getName() );
657 assertEquals( "inline_", it.next().getName() );
658 assertEquals( "inline_", it.next().getName() );
659 assertEquals( "inline_", it.next().getName() );
660 assertEquals( "inline_", it.next().getName() );
661 assertEquals( "inline_", it.next().getName() );
662 assertEquals( "inline_", it.next().getName() );
663 assertEquals( "inline_", it.next().getName() );
664 assertEquals( "inline_", it.next().getName() );
665 assertEquals( "inline_", it.next().getName() );
666 assertEquals( "inline_", it.next().getName() );
667
668 }
669
670 public void testSpecial()
671 throws Exception
672 {
673 String text = "<p><!-- a pagebreak: --><!-- PB -->  <unknown /></p>";
674 parser.parse( text, sink );
675 Iterator<SinkEventElement> it = sink.getEventList().iterator();
676
677 assertEquals( "paragraph", it.next().getName() );
678 assertEquals( "comment", it.next().getName() );
679 assertEquals( "pageBreak", it.next().getName() );
680 assertEquals( "nonBreakingSpace", it.next().getName() );
681 assertEquals( "nonBreakingSpace", it.next().getName() );
682
683 assertEquals( "paragraph_", it.next().getName() );
684 }
685
686 public void testTable()
687 throws Exception
688 {
689 String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
690 parser.parse( text, sink );
691 Iterator<SinkEventElement> it = sink.getEventList().iterator();
692
693 assertEquals( "table", it.next().getName() );
694
695
696 SinkEventElement el = it.next();
697 assertEquals( "tableRows", el.getName() );
698 assertFalse( (Boolean) el.getArgs()[1] );
699
700 assertEquals( "tableCaption", it.next().getName() );
701 assertEquals( "tableCaption_", it.next().getName() );
702 assertEquals( "tableRow", it.next().getName() );
703 assertEquals( "tableHeaderCell", it.next().getName() );
704 assertEquals( "tableHeaderCell_", it.next().getName() );
705 assertEquals( "tableRow_", it.next().getName() );
706 assertEquals( "tableRow", it.next().getName() );
707 assertEquals( "tableCell", it.next().getName() );
708 assertEquals( "tableCell_", it.next().getName() );
709 assertEquals( "tableRow_", it.next().getName() );
710 assertEquals( "tableRows_", it.next().getName() );
711 assertEquals( "table_", it.next().getName() );
712 }
713
714 public void testFigure()
715 throws Exception
716 {
717 String text = "<figure><img src=\"src.jpg\"/><figcaption></figcaption></figure>";
718 parser.parse( text, sink );
719 Iterator<SinkEventElement> it = sink.getEventList().iterator();
720
721 assertEquals( "figure", it.next().getName() );
722 assertEquals( "figureGraphics", it.next().getName() );
723 assertEquals( "figureCaption", it.next().getName() );
724 assertEquals( "figureCaption_", it.next().getName() );
725 assertEquals( "figure_", it.next().getName() );
726 }
727
728 public void testAnchorLink()
729 throws Exception
730 {
731 String text = "<div><a href=\"\"></a>" +
732 "<a href=\"valid\"></a>" +
733 "<a href=\"#1invalid\"></a>" +
734 "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
735 "<a name=\"valid\"></a>" +
736 "<a name=\"1invalid\"></a>" +
737 "<a id=\"1invalid\"></a></div>";
738
739 parser.parse( text, sink );
740 Iterator<SinkEventElement> it = sink.getEventList().iterator();
741
742 SinkEventElement element = it.next();
743 assertEquals( "division", element.getName() );
744
745 element = it.next();
746 assertEquals( "link", element.getName() );
747 assertEquals( "", element.getArgs()[0] );
748 assertEquals( "link_", it.next().getName() );
749
750 element = it.next();
751 assertEquals( "link", element.getName() );
752 assertEquals( "valid", element.getArgs()[0] );
753 assertEquals( "link_", it.next().getName() );
754
755 element = it.next();
756 assertEquals( "link", element.getName() );
757 assertEquals( "#a1invalid", element.getArgs()[0] );
758 assertEquals( "link_", it.next().getName() );
759
760 element = it.next();
761 assertEquals( "link", element.getName() );
762 assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
763 assertEquals( "link_", it.next().getName() );
764
765 element = it.next();
766 assertEquals( "anchor", element.getName() );
767 assertEquals( "valid", element.getArgs()[0] );
768 assertEquals( "anchor_", it.next().getName() );
769
770 element = it.next();
771 assertEquals( "anchor", element.getName() );
772 assertEquals( "a1invalid", element.getArgs()[0] );
773 assertEquals( "anchor_", it.next().getName() );
774
775 element = it.next();
776 assertEquals( "anchor", element.getName() );
777 assertEquals( "a1invalid", element.getArgs()[0] );
778 assertEquals( "anchor_", it.next().getName() );
779
780 element = it.next();
781 assertEquals( "division_", element.getName() );
782 }
783
784
785
786
787
788
789 public void testAttributeEntities()
790 throws Exception
791 {
792 String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&l=e\"></script>";
793
794 parser.parse( text, sink );
795
796 Iterator<SinkEventElement> it = sink.getEventList().iterator();
797
798 SinkEventElement event = it.next();
799
800 assertEquals( "unknown", event.getName() );
801 assertEquals( "script", event.getArgs()[0] );
802 SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
803
804 assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
805 assertEquals( "unknown", it.next().getName() );
806 assertFalse( it.hasNext() );
807
808 sink.reset();
809 text = "<img src=\"http://ex.com/ex.jpg?v=l&l=e\" alt=\"image\"/>";
810 parser.parse( text, sink );
811
812 it = sink.getEventList().iterator();
813 event = it.next();
814 assertEquals( "figureGraphics", event.getName() );
815 attribs = (SinkEventAttributeSet) event.getArgs()[1];
816
817 assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
818 }
819
820 public void testUnbalancedDefinitionListItem() throws Exception
821 {
822 String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
823 "<dl><dd>value</dd></dl>" +
824 "<dl><dt>key</dt></dl>" +
825 "<dl></dl>" +
826 "<dl><dd>value</dd><dt>key</dt></dl></body>";
827
828 parser.parse( text, sink );
829
830 Iterator<SinkEventElement> it = sink.getEventList().iterator();
831 assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
832 "definition", "text", "definition_", "definitionListItem_", "definitionList_" );
833 assertStartsWith( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
834 "definitionListItem_", "definitionList_" );
835 assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
836 "definitionListItem_", "definitionList_" );
837 assertStartsWith( it, "definitionList", "definitionList_" );
838 assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
839 "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
840 "definitionListItem_", "definitionList_" );
841 }
842 }