1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.util.Iterator;
23
24 import org.apache.maven.doxia.logging.Log;
25 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
26 import org.apache.maven.doxia.sink.impl.SinkEventElement;
27 import org.apache.maven.doxia.sink.impl.SinkEventTestingSink;
28
29
30
31
32 public class Xhtml5BaseParserTest
33 extends AbstractParserTest
34 {
35 private Xhtml5BaseParser parser;
36 private final SinkEventTestingSink sink = new SinkEventTestingSink();
37
38
39 @Override
40 protected Parser createParser()
41 {
42 parser = new Xhtml5BaseParser();
43 parser.getLog().setLogLevel( Log.LEVEL_ERROR );
44 return parser;
45 }
46
47 @Override
48 protected String outputExtension()
49 {
50 return "xhtml";
51 }
52
53 @Override
54 protected void setUp() throws Exception
55 {
56 super.setUp();
57
58 parser = new Xhtml5BaseParser();
59 parser.getLog().setLogLevel( Log.LEVEL_ERROR );
60 sink.reset();
61 }
62
63
64 public void testDoxiaVersion()
65 {
66 assertNotNull( XhtmlBaseParser.doxiaVersion() );
67 assertFalse( "unknown".equals( XhtmlBaseParser.doxiaVersion() ) );
68 }
69
70
71 public void testHeadingEventsList()
72 throws Exception
73 {
74 String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
75
76 parser.parse( text, sink );
77
78 Iterator<SinkEventElement> it = sink.getEventList().iterator();
79
80 assertEquals( "paragraph", it.next().getName() );
81 assertEquals( "section1", it.next().getName() );
82 assertEquals( "sectionTitle1", it.next().getName() );
83 assertEquals( "sectionTitle1_", it.next().getName() );
84 assertEquals( "section2", it.next().getName() );
85 assertEquals( "sectionTitle2", it.next().getName() );
86 assertEquals( "sectionTitle2_", it.next().getName() );
87 assertEquals( "section3", it.next().getName() );
88 assertEquals( "sectionTitle3", it.next().getName() );
89 assertEquals( "sectionTitle3_", it.next().getName() );
90 assertEquals( "section4", it.next().getName() );
91 assertEquals( "sectionTitle4", it.next().getName() );
92 assertEquals( "sectionTitle4_", it.next().getName() );
93 assertEquals( "section5", it.next().getName() );
94 assertEquals( "sectionTitle5", it.next().getName() );
95 assertEquals( "sectionTitle5_", it.next().getName() );
96 assertEquals( "section5_", it.next().getName() );
97 assertEquals( "section4_", it.next().getName() );
98 assertEquals( "section3_", it.next().getName() );
99 assertEquals( "section2_", it.next().getName() );
100 assertEquals( "section1_", it.next().getName() );
101 assertEquals( "section1", it.next().getName() );
102 assertEquals( "sectionTitle1", it.next().getName() );
103 assertEquals( "sectionTitle1_", it.next().getName() );
104
105
106
107 assertEquals( "paragraph_", it.next().getName() );
108 assertFalse( it.hasNext() );
109 }
110
111
112 public void testNestedHeadingEventsList()
113 throws Exception
114 {
115
116 String text = "<p><h2></h2><h6></h6><h3></h3></p>";
117
118 parser.parse( text, sink );
119
120 Iterator<SinkEventElement> it = sink.getEventList().iterator();
121
122 assertEquals( "paragraph", it.next().getName() );
123 assertEquals( "section1", it.next().getName() );
124 assertEquals( "sectionTitle1", it.next().getName() );
125 assertEquals( "sectionTitle1_", it.next().getName() );
126
127 assertEquals( "section2", it.next().getName() );
128 assertEquals( "section3", it.next().getName() );
129 assertEquals( "section4", it.next().getName() );
130
131 assertEquals( "section5", it.next().getName() );
132 assertEquals( "sectionTitle5", it.next().getName() );
133 assertEquals( "sectionTitle5_", it.next().getName() );
134 assertEquals( "section5_", it.next().getName() );
135
136 assertEquals( "section4_", it.next().getName() );
137 assertEquals( "section3_", it.next().getName() );
138 assertEquals( "section2_", it.next().getName() );
139
140 assertEquals( "section2", it.next().getName() );
141 assertEquals( "sectionTitle2", it.next().getName() );
142 assertEquals( "sectionTitle2_", it.next().getName() );
143
144
145
146
147 assertEquals( "paragraph_", it.next().getName() );
148 assertFalse( it.hasNext() );
149 }
150
151
152 public void testFigureEventsList()
153 throws Exception
154 {
155 String text = "<img src=\"source\" title=\"caption\" />";
156
157 parser.parse( text, sink );
158
159 Iterator<SinkEventElement> it = sink.getEventList().iterator();
160
161 assertEquals( "figureGraphics", it.next().getName() );
162 assertFalse( it.hasNext() );
163 }
164
165
166 public void testTableEventsList()
167 throws Exception
168 {
169
170
171 String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
172
173 parser.parse( text, sink );
174
175 Iterator<SinkEventElement> it = sink.getEventList().iterator();
176
177 assertEquals( "table", it.next().getName() );
178 assertEquals( "tableRows", it.next().getName() );
179 assertEquals( "tableRow", it.next().getName() );
180 assertEquals( "tableHeaderCell", it.next().getName() );
181 assertEquals( "text", it.next().getName() );
182 assertEquals( "tableHeaderCell_", it.next().getName() );
183 assertEquals( "tableRow_", it.next().getName() );
184 assertEquals( "tableRow", it.next().getName() );
185 assertEquals( "tableCell", it.next().getName() );
186 assertEquals( "text", it.next().getName() );
187 assertEquals( "tableCell_", it.next().getName() );
188 assertEquals( "tableRow_", it.next().getName() );
189 assertEquals( "tableRows_", it.next().getName() );
190 assertEquals( "table_", it.next().getName() );
191
192 assertFalse( it.hasNext() );
193 }
194
195
196 public void testSignificantWhiteSpace()
197 throws Exception
198 {
199
200 String text = "<p><b>word</b> <i>word</i></p>";
201
202 parser.parse( text, sink );
203
204 Iterator<SinkEventElement> it = sink.getEventList().iterator();
205
206 assertEquals( "paragraph", it.next().getName() );
207 assertEquals( "inline", it.next().getName() );
208 assertEquals( "text", it.next().getName() );
209 assertEquals( "inline_", it.next().getName() );
210
211 SinkEventElement el = it.next();
212 assertEquals( "text", el.getName() );
213 assertEquals( " ", (String) el.getArgs()[0] );
214
215 assertEquals( "inline", it.next().getName() );
216 assertEquals( "text", it.next().getName() );
217 assertEquals( "inline_", it.next().getName() );
218 assertEquals( "paragraph_", it.next().getName() );
219 assertFalse( it.hasNext() );
220
221
222
223 String eol = System.getProperty( "line.separator" );
224 text = "<p><b>word</b>" + eol + "<i>word</i></p>";
225
226 sink.reset();
227 parser.parse( text, sink );
228 it = sink.getEventList().iterator();
229
230 assertEquals( "paragraph", it.next().getName() );
231 assertEquals( "inline", it.next().getName() );
232 assertEquals( "text", it.next().getName() );
233 assertEquals( "inline_", it.next().getName() );
234
235 el = it.next();
236 assertEquals( "text", el.getName() );
237
238 assertEquals( "\n", (String) el.getArgs()[0] );
239
240 assertEquals( "inline", it.next().getName() );
241 assertEquals( "text", it.next().getName() );
242 assertEquals( "inline_", it.next().getName() );
243 assertEquals( "paragraph_", it.next().getName() );
244 assertFalse( it.hasNext() );
245
246
247
248 text = "<p>There should be no space after the last <i>word</i>.</p>";
249
250 sink.reset();
251 parser.parse( text, sink );
252 it = sink.getEventList().iterator();
253
254 assertEquals( "paragraph", it.next().getName() );
255 assertEquals( "text", it.next().getName() );
256 assertEquals( "inline", it.next().getName() );
257 assertEquals( "text", it.next().getName() );
258 assertEquals( "inline_", it.next().getName() );
259
260 el = it.next();
261 assertEquals( "text", el.getName() );
262 assertEquals( ".", (String) el.getArgs()[0] );
263
264 assertEquals( "paragraph_", it.next().getName() );
265 assertFalse( it.hasNext() );
266 }
267
268
269 public void testPreFormattedText()
270 throws Exception
271 {
272 String text = "<pre><a href=\"what.html\">what</a></pre>";
273
274 parser.parse( text, sink );
275
276 Iterator<SinkEventElement> it = sink.getEventList().iterator();
277 assertEquals( "verbatim", it.next().getName() );
278 assertEquals( "link", it.next().getName() );
279 assertEquals( "text", it.next().getName() );
280 assertEquals( "link_", it.next().getName() );
281 assertEquals( "verbatim_", it.next().getName() );
282 assertFalse( it.hasNext() );
283
284 text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
285 sink.reset();
286 parser.parse( text, sink );
287
288 it = sink.getEventList().iterator();
289 assertEquals( "verbatim", it.next().getName() );
290 assertEquals( "text", it.next().getName() );
291 assertEquals( "verbatim_", it.next().getName() );
292 assertFalse( it.hasNext() );
293
294 text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
295 sink.reset();
296 parser.parse( text, sink );
297
298 it = sink.getEventList().iterator();
299 assertEquals( "verbatim", it.next().getName() );
300 assertEquals( "text", it.next().getName() );
301 assertEquals( "verbatim_", it.next().getName() );
302 assertFalse( it.hasNext() );
303 }
304
305
306 public void testPreEOL()
307 throws Exception
308 {
309
310 String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
311 + "<a href=\"what.html\">what</a></pre>";
312
313 parser.parse( text, sink );
314
315 Iterator<SinkEventElement> it = sink.getEventList().iterator();
316
317 assertEquals( "verbatim", it.next().getName() );
318 assertEquals( "link", it.next().getName() );
319 assertEquals( "text", it.next().getName() );
320 assertEquals( "link_", it.next().getName() );
321 assertEquals( "text", it.next().getName() );
322 assertEquals( "link", it.next().getName() );
323 assertEquals( "text", it.next().getName() );
324 assertEquals( "link_", it.next().getName() );
325 assertEquals( "verbatim_", it.next().getName() );
326 }
327
328
329 public void testDoxia250()
330 throws Exception
331 {
332 StringBuilder sb = new StringBuilder();
333 sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
334 sb.append( "<!ENTITY foo \"ř\">" ).append( XhtmlBaseParser.EOL );
335 sb.append( "<!ENTITY foo1 \" \">" ).append( XhtmlBaseParser.EOL );
336 sb.append( "<!ENTITY foo2 \"š\">" ).append( XhtmlBaseParser.EOL );
337 sb.append( "<!ENTITY tritPos \"𝟭\">" ).append( XhtmlBaseParser.EOL );
338 sb.append( "]>" ).append( XhtmlBaseParser.EOL );
339 sb.append( "<p>&foo;&foo1;&foo2;&tritPos;</p>" );
340
341 parser.setValidate( false );
342 parser.parse( sb.toString(), sink );
343
344 Iterator<SinkEventElement> it = sink.getEventList().iterator();
345
346 SinkEventElement event = it.next();
347 assertEquals( "paragraph", event.getName() );
348
349 event = it.next();
350 assertEquals( "text", event.getName() );
351 assertEquals( "\u0159", (String) event.getArgs()[0] );
352
353 event = it.next();
354 assertEquals( "text", event.getName() );
355 assertEquals( "\u00A0", (String) event.getArgs()[0] );
356
357 event = it.next();
358 assertEquals( "text", event.getName() );
359 assertEquals( "\u0161", (String) event.getArgs()[0] );
360
361 event = it.next();
362 assertEquals( "text", event.getName() );
363 assertEquals( "\uD835\uDFED", (String) event.getArgs()[0] );
364
365 event = it.next();
366 assertEquals( "paragraph_", event.getName() );
367 }
368
369
370 public void testEntities()
371 throws Exception
372 {
373 final String text = "<!DOCTYPE test [<!ENTITY flo \"ř\"><!ENTITY tritPos \"𝟭\"><!ENTITY fo \"A\"><!ENTITY myCustom \"&fo;\">]>"
374 + "<body><h2>&&flo;ř&tritPos;𝟭</h2><p>&&flo;ř&tritPos;𝟭&myCustom;</p></body>";
375
376 parser.setValidate( false );
377 parser.parse( text, sink );
378
379 Iterator<SinkEventElement> it = sink.getEventList().iterator();
380
381 assertEquals( "section1", it.next().getName() );
382 assertEquals( "sectionTitle1", it.next().getName() );
383
384 SinkEventElement textEvt = it.next();
385 assertEquals( "text", textEvt.getName() );
386 assertEquals( "&", textEvt.getArgs()[0] );
387
388 textEvt = it.next();
389 assertEquals( "text", textEvt.getName() );
390 assertEquals( "\u0159", textEvt.getArgs()[0] );
391
392 textEvt = it.next();
393 assertEquals( "text", textEvt.getName() );
394 assertEquals( "\u0159", textEvt.getArgs()[0] );
395
396 textEvt = it.next();
397 assertEquals( "text", textEvt.getName() );
398 assertEquals( "\uD835\uDFED", (String) textEvt.getArgs()[0] );
399
400 textEvt = it.next();
401 assertEquals( "text", textEvt.getName() );
402 assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
403
404 assertEquals( "sectionTitle1_", it.next().getName() );
405 assertEquals( "paragraph", it.next().getName() );
406
407 textEvt = it.next();
408 assertEquals( "text", textEvt.getName() );
409 assertEquals( "&", textEvt.getArgs()[0] );
410
411 textEvt = it.next();
412 assertEquals( "text", textEvt.getName() );
413 assertEquals( "\u0159", textEvt.getArgs()[0] );
414
415 textEvt = it.next();
416 assertEquals( "text", textEvt.getName() );
417 assertEquals( "\u0159", textEvt.getArgs()[0] );
418
419 textEvt = it.next();
420 assertEquals( "text", textEvt.getName() );
421 assertEquals( "\uD835\uDFED", (String) textEvt.getArgs()[0] );
422
423 textEvt = it.next();
424 assertEquals( "text", textEvt.getName() );
425 assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
426
427 textEvt = it.next();
428 assertEquals( "text", textEvt.getName() );
429 assertEquals( "A", textEvt.getArgs()[0] );
430
431 assertEquals( "paragraph_", it.next().getName() );
432
433
434
435 assertFalse( it.hasNext() );
436 }
437
438
439 public void testXhtmlEntities()
440 throws Exception
441 {
442 final String text = "<body><h2>"&</h2><p>'<></p></body>";
443
444 parser.parse( text, sink );
445
446 Iterator<SinkEventElement> it = sink.getEventList().iterator();
447
448 assertEquals( "section1", it.next().getName() );
449 assertEquals( "sectionTitle1", it.next().getName() );
450
451 SinkEventElement textEvt = it.next();
452 assertEquals( "text", textEvt.getName() );
453 assertEquals( "\"", textEvt.getArgs()[0] );
454
455 textEvt = it.next();
456 assertEquals( "text", textEvt.getName() );
457 assertEquals( "&", textEvt.getArgs()[0] );
458
459 assertEquals( "sectionTitle1_", it.next().getName() );
460 assertEquals( "paragraph", it.next().getName() );
461
462 textEvt = it.next();
463 assertEquals( "text", textEvt.getName() );
464 assertEquals( "\'", textEvt.getArgs()[0] );
465
466 textEvt = it.next();
467 assertEquals( "text", textEvt.getName() );
468 assertEquals( "<", textEvt.getArgs()[0] );
469
470 textEvt = it.next();
471 assertEquals( "text", textEvt.getName() );
472 assertEquals( ">", textEvt.getArgs()[0] );
473
474 assertEquals( "paragraph_", it.next().getName() );
475
476 assertFalse( it.hasNext() );
477 }
478
479
480 public void testLists()
481 throws Exception
482 {
483 String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
484 parser.parse( text, sink );
485 Iterator<SinkEventElement> it = sink.getEventList().iterator();
486
487 assertEquals( "division", it.next().getName() );
488 assertEquals( "list", it.next().getName() );
489 assertEquals( "listItem", it.next().getName() );
490 assertEquals( "listItem_", it.next().getName() );
491 assertEquals( "list_", it.next().getName() );
492
493 assertEquals( "numberedList", it.next().getName() );
494 assertEquals( "numberedListItem", it.next().getName() );
495 assertEquals( "numberedListItem_", it.next().getName() );
496 assertEquals( "numberedList_", it.next().getName() );
497
498 assertEquals( "definitionList", it.next().getName() );
499 assertEquals( "definitionListItem", it.next().getName() );
500 assertEquals( "definedTerm", it.next().getName() );
501 assertEquals( "definedTerm_", it.next().getName() );
502 assertEquals( "definition", it.next().getName() );
503 assertEquals( "definition_", it.next().getName() );
504 assertEquals( "definitionListItem_", it.next().getName() );
505 assertEquals( "definitionList_", it.next().getName() );
506 assertEquals( "division_", it.next().getName() );
507 }
508
509
510 public void testSimpleTags()
511 throws Exception
512 {
513 String text = "<div><br /><wbr /><hr /><img src=\"img.src\"/></div>";
514 parser.parse( text, sink );
515 Iterator<SinkEventElement> it = sink.getEventList().iterator();
516
517 assertEquals( "division", it.next().getName() );
518 assertEquals( "lineBreak", it.next().getName() );
519 assertEquals( "lineBreakOpportunity", it.next().getName() );
520 assertEquals( "horizontalRule", it.next().getName() );
521 assertEquals( "figureGraphics", it.next().getName() );
522 assertEquals( "division_", it.next().getName() );
523 }
524
525
526 public void testSemanticTags()
527 throws Exception
528 {
529 String text = "<em><strong><small><s><cite><q><dfn><abbr><i><b><code><var><samp><kbd><sup><sub><u><mark><ruby><rb><rt><rtc><rp><bdi><bdo><span><ins><del>a text & Æ</del></ins></span></bdo></bdi></rp></rtc></rt></rb></ruby></mark></u></sub></sup></kbd></samp></var></code></b></i></abbr></dfn></q></cite></s></small></strong></em>";
530 parser.parse( text, sink );
531 Iterator<SinkEventElement> it = sink.getEventList().iterator();
532
533 SinkEventElement event = it.next();
534 assertEquals( "inline", event.getName() );
535 assertEquals( "semantics=emphasis", event.getArgs()[0].toString().trim() );
536
537 event = it.next();
538 assertEquals( "inline", event.getName() );
539 assertEquals( "semantics=strong", event.getArgs()[0].toString().trim() );
540
541 event = it.next();
542 assertEquals( "inline", event.getName() );
543 assertEquals( "semantics=small", event.getArgs()[0].toString().trim() );
544
545 event = it.next();
546 assertEquals( "inline", event.getName() );
547 assertEquals( "semantics=line-through", event.getArgs()[0].toString().trim() );
548
549 event = it.next();
550 assertEquals( "inline", event.getName() );
551 assertEquals( "semantics=citation", event.getArgs()[0].toString().trim() );
552
553 event = it.next();
554 assertEquals( "inline", event.getName() );
555 assertEquals( "semantics=quote", event.getArgs()[0].toString().trim() );
556
557 event = it.next();
558 assertEquals( "inline", event.getName() );
559 assertEquals( "semantics=definition", event.getArgs()[0].toString().trim() );
560
561 event = it.next();
562 assertEquals( "inline", event.getName() );
563 assertEquals( "semantics=abbreviation", event.getArgs()[0].toString().trim() );
564
565 event = it.next();
566 assertEquals( "inline", event.getName() );
567 assertEquals( "semantics=italic", event.getArgs()[0].toString().trim() );
568
569 event = it.next();
570 assertEquals( "inline", event.getName() );
571 assertEquals( "semantics=bold", event.getArgs()[0].toString().trim() );
572
573 event = it.next();
574 assertEquals( "inline", event.getName() );
575 assertEquals( "semantics=code", event.getArgs()[0].toString().trim() );
576
577 event = it.next();
578 assertEquals( "inline", event.getName() );
579 assertEquals( "semantics=variable", event.getArgs()[0].toString().trim() );
580
581 event = it.next();
582 assertEquals( "inline", event.getName() );
583 assertEquals( "semantics=sample", event.getArgs()[0].toString().trim() );
584
585 event = it.next();
586 assertEquals( "inline", event.getName() );
587 assertEquals( "semantics=keyboard", event.getArgs()[0].toString().trim() );
588
589 event = it.next();
590 assertEquals( "inline", event.getName() );
591 assertEquals( "semantics=superscript", event.getArgs()[0].toString().trim() );
592
593 event = it.next();
594 assertEquals( "inline", event.getName() );
595 assertEquals( "semantics=subscript", event.getArgs()[0].toString().trim() );
596
597 event = it.next();
598 assertEquals( "inline", event.getName() );
599 assertEquals( "semantics=annotation", event.getArgs()[0].toString().trim() );
600
601 event = it.next();
602 assertEquals( "inline", event.getName() );
603 assertEquals( "semantics=highlight", event.getArgs()[0].toString().trim() );
604
605 event = it.next();
606 assertEquals( "inline", event.getName() );
607 assertEquals( "semantics=ruby", event.getArgs()[0].toString().trim() );
608
609 event = it.next();
610 assertEquals( "inline", event.getName() );
611 assertEquals( "semantics=rubyBase", event.getArgs()[0].toString().trim() );
612
613 event = it.next();
614 assertEquals( "inline", event.getName() );
615 assertEquals( "semantics=rubyText", event.getArgs()[0].toString().trim() );
616
617 event = it.next();
618 assertEquals( "inline", event.getName() );
619 assertEquals( "semantics=rubyTextContainer", event.getArgs()[0].toString().trim() );
620
621 event = it.next();
622 assertEquals( "inline", event.getName() );
623 assertEquals( "semantics=rubyParentheses", event.getArgs()[0].toString().trim() );
624
625 event = it.next();
626 assertEquals( "inline", event.getName() );
627 assertEquals( "semantics=bidirectionalIsolation", event.getArgs()[0].toString().trim() );
628
629 event = it.next();
630 assertEquals( "inline", event.getName() );
631 assertEquals( "semantics=bidirectionalOverride", event.getArgs()[0].toString().trim() );
632
633 event = it.next();
634 assertEquals( "inline", event.getName() );
635 assertEquals( "semantics=phrase", event.getArgs()[0].toString().trim() );
636
637 event = it.next();
638 assertEquals( "inline", event.getName() );
639 assertEquals( "semantics=insert", event.getArgs()[0].toString().trim() );
640
641 event = it.next();
642 assertEquals( "inline", event.getName() );
643 assertEquals( "semantics=delete", event.getArgs()[0].toString().trim() );
644
645 assertEquals( "text", it.next().getName() );
646 assertEquals( "text", it.next().getName() );
647 assertEquals( "text", it.next().getName() );
648 assertEquals( "text", it.next().getName() );
649
650 assertEquals( "inline_", it.next().getName() );
651 assertEquals( "inline_", it.next().getName() );
652 assertEquals( "inline_", it.next().getName() );
653 assertEquals( "inline_", it.next().getName() );
654 assertEquals( "inline_", it.next().getName() );
655 assertEquals( "inline_", it.next().getName() );
656 assertEquals( "inline_", it.next().getName() );
657 assertEquals( "inline_", it.next().getName() );
658 assertEquals( "inline_", it.next().getName() );
659 assertEquals( "inline_", it.next().getName() );
660 assertEquals( "inline_", it.next().getName() );
661 assertEquals( "inline_", it.next().getName() );
662 assertEquals( "inline_", it.next().getName() );
663 assertEquals( "inline_", it.next().getName() );
664 assertEquals( "inline_", it.next().getName() );
665 assertEquals( "inline_", it.next().getName() );
666 assertEquals( "inline_", it.next().getName() );
667 assertEquals( "inline_", it.next().getName() );
668 assertEquals( "inline_", it.next().getName() );
669 assertEquals( "inline_", it.next().getName() );
670 assertEquals( "inline_", it.next().getName() );
671 assertEquals( "inline_", it.next().getName() );
672 assertEquals( "inline_", it.next().getName() );
673 assertEquals( "inline_", it.next().getName() );
674 assertEquals( "inline_", it.next().getName() );
675 assertEquals( "inline_", it.next().getName() );
676 assertEquals( "inline_", it.next().getName() );
677 assertEquals( "inline_", it.next().getName() );
678
679 }
680
681
682 public void testSpecial()
683 throws Exception
684 {
685 String text = "<p><!-- a pagebreak: --><!-- PB -->  <unknown /></p>";
686 parser.parse( text, sink );
687 Iterator<SinkEventElement> it = sink.getEventList().iterator();
688
689 assertEquals( "paragraph", it.next().getName() );
690 assertEquals( "comment", it.next().getName() );
691 assertEquals( "pageBreak", it.next().getName() );
692 assertEquals( "nonBreakingSpace", it.next().getName() );
693 assertEquals( "nonBreakingSpace", it.next().getName() );
694
695 assertEquals( "paragraph_", it.next().getName() );
696 }
697
698
699 public void testTable()
700 throws Exception
701 {
702 String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
703 parser.parse( text, sink );
704 Iterator<SinkEventElement> it = sink.getEventList().iterator();
705
706 assertEquals( "table", it.next().getName() );
707
708
709 SinkEventElement el = it.next();
710 assertEquals( "tableRows", el.getName() );
711 assertFalse( (Boolean) el.getArgs()[1] );
712
713 assertEquals( "tableCaption", it.next().getName() );
714 assertEquals( "tableCaption_", it.next().getName() );
715 assertEquals( "tableRow", it.next().getName() );
716 assertEquals( "tableHeaderCell", it.next().getName() );
717 assertEquals( "tableHeaderCell_", it.next().getName() );
718 assertEquals( "tableRow_", it.next().getName() );
719 assertEquals( "tableRow", it.next().getName() );
720 assertEquals( "tableCell", it.next().getName() );
721 assertEquals( "tableCell_", it.next().getName() );
722 assertEquals( "tableRow_", it.next().getName() );
723 assertEquals( "tableRows_", it.next().getName() );
724 assertEquals( "table_", it.next().getName() );
725 }
726
727
728 public void testFigure()
729 throws Exception
730 {
731 String text = "<figure><img src=\"src.jpg\"/><figcaption></figcaption></figure>";
732 parser.parse( text, sink );
733 Iterator<SinkEventElement> it = sink.getEventList().iterator();
734
735 assertEquals( "figure", it.next().getName() );
736 assertEquals( "figureGraphics", it.next().getName() );
737 assertEquals( "figureCaption", it.next().getName() );
738 assertEquals( "figureCaption_", it.next().getName() );
739 assertEquals( "figure_", it.next().getName() );
740 }
741
742
743 public void testAnchorLink()
744 throws Exception
745 {
746 String text = "<div><a href=\"\"></a>" +
747 "<a href=\"valid\"></a>" +
748 "<a href=\"#1invalid\"></a>" +
749 "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
750 "<a name=\"valid\"></a>" +
751 "<a name=\"1invalid\"></a>" +
752 "<a id=\"1invalid\"></a></div>";
753
754 parser.parse( text, sink );
755 Iterator<SinkEventElement> it = sink.getEventList().iterator();
756
757 SinkEventElement element = it.next();
758 assertEquals( "division", element.getName() );
759
760 element = it.next();
761 assertEquals( "link", element.getName() );
762 assertEquals( "", element.getArgs()[0] );
763 assertEquals( "link_", it.next().getName() );
764
765 element = it.next();
766 assertEquals( "link", element.getName() );
767 assertEquals( "valid", element.getArgs()[0] );
768 assertEquals( "link_", it.next().getName() );
769
770 element = it.next();
771 assertEquals( "link", element.getName() );
772 assertEquals( "#a1invalid", element.getArgs()[0] );
773 assertEquals( "link_", it.next().getName() );
774
775 element = it.next();
776 assertEquals( "link", element.getName() );
777 assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
778 assertEquals( "link_", it.next().getName() );
779
780 element = it.next();
781 assertEquals( "anchor", element.getName() );
782 assertEquals( "valid", element.getArgs()[0] );
783 assertEquals( "anchor_", it.next().getName() );
784
785 element = it.next();
786 assertEquals( "anchor", element.getName() );
787 assertEquals( "a1invalid", element.getArgs()[0] );
788 assertEquals( "anchor_", it.next().getName() );
789
790 element = it.next();
791 assertEquals( "anchor", element.getName() );
792 assertEquals( "a1invalid", element.getArgs()[0] );
793 assertEquals( "anchor_", it.next().getName() );
794
795 element = it.next();
796 assertEquals( "division_", element.getName() );
797 }
798
799
800
801
802
803
804 public void testAttributeEntities()
805 throws Exception
806 {
807 String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&l=e\"></script>";
808
809 parser.parse( text, sink );
810
811 Iterator<SinkEventElement> it = sink.getEventList().iterator();
812
813 SinkEventElement event = it.next();
814
815 assertEquals( "unknown", event.getName() );
816 assertEquals( "script", event.getArgs()[0] );
817 SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
818
819 assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
820 assertEquals( "unknown", it.next().getName() );
821 assertFalse( it.hasNext() );
822
823 sink.reset();
824 text = "<img src=\"http://ex.com/ex.jpg?v=l&l=e\" alt=\"image\"/>";
825 parser.parse( text, sink );
826
827 it = sink.getEventList().iterator();
828 event = it.next();
829 assertEquals( "figureGraphics", event.getName() );
830 attribs = (SinkEventAttributeSet) event.getArgs()[1];
831
832 assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
833 }
834
835 public void testUnbalancedDefinitionListItem() throws Exception
836 {
837 String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
838 "<dl><dd>value</dd></dl>" +
839 "<dl><dt>key</dt></dl>" +
840 "<dl></dl>" +
841 "<dl><dd>value</dd><dt>key</dt></dl></body>";
842
843 parser.parse( text, sink );
844
845 Iterator<SinkEventElement> it = sink.getEventList().iterator();
846 assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
847 "definition", "text", "definition_", "definitionListItem_", "definitionList_" );
848 assertStartsWith( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
849 "definitionListItem_", "definitionList_" );
850 assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
851 "definitionListItem_", "definitionList_" );
852 assertStartsWith( it, "definitionList", "definitionList_" );
853 assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
854 "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
855 "definitionListItem_", "definitionList_" );
856 }
857 }