1 | |
package org.apache.maven.doxia.parser; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.Reader; |
23 | |
import java.util.HashMap; |
24 | |
import java.util.Map; |
25 | |
import java.util.Set; |
26 | |
import java.util.TreeSet; |
27 | |
|
28 | |
import javax.swing.text.html.HTML.Attribute; |
29 | |
|
30 | |
import org.apache.maven.doxia.macro.MacroExecutionException; |
31 | |
import org.apache.maven.doxia.markup.HtmlMarkup; |
32 | |
import org.apache.maven.doxia.sink.Sink; |
33 | |
import org.apache.maven.doxia.sink.SinkEventAttributeSet; |
34 | |
import org.apache.maven.doxia.sink.SinkEventAttributes; |
35 | |
import org.apache.maven.doxia.util.DoxiaUtils; |
36 | |
|
37 | |
import org.codehaus.plexus.util.StringUtils; |
38 | |
import org.codehaus.plexus.util.xml.pull.XmlPullParser; |
39 | |
import org.codehaus.plexus.util.xml.pull.XmlPullParserException; |
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | 44 | public class XhtmlBaseParser |
50 | |
extends AbstractXmlParser |
51 | |
implements HtmlMarkup |
52 | |
{ |
53 | |
|
54 | |
private boolean scriptBlock; |
55 | |
|
56 | |
|
57 | |
private boolean isLink; |
58 | |
|
59 | |
|
60 | |
private boolean isAnchor; |
61 | |
|
62 | |
|
63 | 44 | private int orderedListDepth = 0; |
64 | |
|
65 | |
|
66 | |
private int sectionLevel; |
67 | |
|
68 | |
|
69 | |
private boolean inVerbatim; |
70 | |
|
71 | |
|
72 | |
private boolean inFigure; |
73 | |
|
74 | |
|
75 | 44 | private final SinkEventAttributeSet decoration = new SinkEventAttributeSet(); |
76 | |
|
77 | |
|
78 | |
|
79 | |
private Map<String, Set<String>> warnMessages; |
80 | |
|
81 | |
|
82 | |
@Override |
83 | |
public void parse( Reader source, Sink sink ) |
84 | |
throws ParseException |
85 | |
{ |
86 | 52 | init(); |
87 | |
|
88 | |
try |
89 | |
{ |
90 | 52 | super.parse( source, sink ); |
91 | |
} |
92 | |
finally |
93 | |
{ |
94 | 52 | logWarnings(); |
95 | |
|
96 | 52 | setSecondParsing( false ); |
97 | 52 | init(); |
98 | 52 | } |
99 | 52 | } |
100 | |
|
101 | |
|
102 | |
|
103 | |
|
104 | |
|
105 | |
|
106 | |
|
107 | |
|
108 | |
|
109 | |
|
110 | |
|
111 | |
|
112 | |
|
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
|
122 | |
protected boolean baseStartTag( XmlPullParser parser, Sink sink ) |
123 | |
{ |
124 | 214 | boolean visited = true; |
125 | |
|
126 | 214 | SinkEventAttributeSet attribs = getAttributesFromParser( parser ); |
127 | |
|
128 | 214 | if ( parser.getName().equals( HtmlMarkup.H2.toString() ) ) |
129 | |
{ |
130 | 26 | handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs ); |
131 | |
} |
132 | 188 | else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) ) |
133 | |
{ |
134 | 10 | handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs ); |
135 | |
} |
136 | 178 | else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) ) |
137 | |
{ |
138 | 8 | handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs ); |
139 | |
} |
140 | 170 | else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) ) |
141 | |
{ |
142 | 2 | handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs ); |
143 | |
} |
144 | 168 | else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) ) |
145 | |
{ |
146 | 4 | handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs ); |
147 | |
} |
148 | 164 | else if ( parser.getName().equals( HtmlMarkup.U.toString() ) ) |
149 | |
{ |
150 | 2 | decoration.addAttribute( SinkEventAttributes.DECORATION, "underline" ); |
151 | |
} |
152 | 162 | else if ( parser.getName().equals( HtmlMarkup.S.toString() ) |
153 | |
|| parser.getName().equals( HtmlMarkup.STRIKE.toString() ) |
154 | |
|| parser.getName().equals( "del" ) ) |
155 | |
{ |
156 | 6 | decoration.addAttribute( SinkEventAttributes.DECORATION, "line-through" ); |
157 | |
} |
158 | 156 | else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) ) |
159 | |
{ |
160 | 2 | decoration.addAttribute( SinkEventAttributes.VALIGN, "sub" ); |
161 | |
} |
162 | 154 | else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) ) |
163 | |
{ |
164 | 2 | decoration.addAttribute( SinkEventAttributes.VALIGN, "sup" ); |
165 | |
} |
166 | 152 | else if ( parser.getName().equals( HtmlMarkup.P.toString() ) ) |
167 | |
{ |
168 | 18 | handlePStart( sink, attribs ); |
169 | |
} |
170 | 134 | else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) ) |
171 | |
{ |
172 | 18 | visited = handleDivStart( parser, attribs, sink ); |
173 | |
} |
174 | 116 | else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) ) |
175 | |
{ |
176 | 8 | handlePreStart( attribs, sink ); |
177 | |
} |
178 | 108 | else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) ) |
179 | |
{ |
180 | 2 | sink.list( attribs ); |
181 | |
} |
182 | 106 | else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) ) |
183 | |
{ |
184 | 2 | handleOLStart( parser, sink, attribs ); |
185 | |
} |
186 | 104 | else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) ) |
187 | |
{ |
188 | 4 | handleLIStart( sink, attribs ); |
189 | |
} |
190 | 100 | else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) ) |
191 | |
{ |
192 | 2 | sink.definitionList( attribs ); |
193 | |
} |
194 | 98 | else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) ) |
195 | |
{ |
196 | 2 | sink.definitionListItem( attribs ); |
197 | 2 | sink.definedTerm( attribs ); |
198 | |
} |
199 | 96 | else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) ) |
200 | |
{ |
201 | 2 | sink.definition( attribs ); |
202 | |
} |
203 | 94 | else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) ) |
204 | |
|| ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) ) |
205 | |
{ |
206 | 12 | sink.bold(); |
207 | |
} |
208 | 82 | else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) ) |
209 | |
|| ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) ) |
210 | |
{ |
211 | 14 | handleFigureCaptionStart( sink, attribs ); |
212 | |
} |
213 | 68 | else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) ) |
214 | |
|| ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) ) |
215 | |
|| ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) ) |
216 | |
{ |
217 | 8 | sink.monospaced(); |
218 | |
} |
219 | 60 | else if ( parser.getName().equals( HtmlMarkup.A.toString() ) ) |
220 | |
{ |
221 | 20 | handleAStart( parser, sink, attribs ); |
222 | |
} |
223 | 40 | else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) ) |
224 | |
{ |
225 | 4 | handleTableStart( sink, attribs, parser ); |
226 | |
} |
227 | 36 | else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) ) |
228 | |
{ |
229 | 8 | sink.tableRow( attribs ); |
230 | |
} |
231 | 28 | else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) ) |
232 | |
{ |
233 | 4 | sink.tableHeaderCell( attribs ); |
234 | |
} |
235 | 24 | else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) ) |
236 | |
{ |
237 | 4 | sink.tableCell( attribs ); |
238 | |
} |
239 | 20 | else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) ) |
240 | |
{ |
241 | 2 | sink.tableCaption( attribs ); |
242 | |
} |
243 | 18 | else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) ) |
244 | |
{ |
245 | 2 | sink.lineBreak( attribs ); |
246 | |
} |
247 | 16 | else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) ) |
248 | |
{ |
249 | 2 | sink.horizontalRule( attribs ); |
250 | |
} |
251 | 14 | else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) ) |
252 | |
{ |
253 | 8 | handleImgStart( parser, sink, attribs ); |
254 | |
} |
255 | 6 | else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() ) ) |
256 | |
{ |
257 | 2 | handleUnknown( parser, sink, TAG_TYPE_START ); |
258 | 2 | scriptBlock = true; |
259 | |
} |
260 | |
else |
261 | |
{ |
262 | 4 | visited = false; |
263 | |
} |
264 | |
|
265 | 214 | return visited; |
266 | |
} |
267 | |
|
268 | |
|
269 | |
|
270 | |
|
271 | |
|
272 | |
|
273 | |
|
274 | |
|
275 | |
|
276 | |
|
277 | |
|
278 | |
|
279 | |
|
280 | |
protected boolean baseEndTag( XmlPullParser parser, Sink sink ) |
281 | |
{ |
282 | 214 | boolean visited = true; |
283 | |
|
284 | 214 | if ( parser.getName().equals( HtmlMarkup.P.toString() ) ) |
285 | |
{ |
286 | 18 | if ( !inFigure ) |
287 | |
{ |
288 | 14 | sink.paragraph_(); |
289 | |
} |
290 | |
} |
291 | 196 | else if ( parser.getName().equals( HtmlMarkup.U.toString() ) |
292 | |
|| parser.getName().equals( HtmlMarkup.S.toString() ) |
293 | |
|| parser.getName().equals( HtmlMarkup.STRIKE.toString() ) |
294 | |
|| parser.getName().equals( "del" ) ) |
295 | |
{ |
296 | 8 | decoration.removeAttribute( SinkEventAttributes.DECORATION ); |
297 | |
} |
298 | 188 | else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) |
299 | |
|| parser.getName().equals( HtmlMarkup.SUP.toString() ) ) |
300 | |
{ |
301 | 4 | decoration.removeAttribute( SinkEventAttributes.VALIGN ); |
302 | |
} |
303 | 184 | else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) ) |
304 | |
{ |
305 | 18 | if ( inFigure ) |
306 | |
{ |
307 | 2 | sink.figure_(); |
308 | 2 | this.inFigure = false; |
309 | |
} |
310 | |
else |
311 | |
{ |
312 | 16 | visited = false; |
313 | |
} |
314 | |
} |
315 | 166 | else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) ) |
316 | |
{ |
317 | 8 | verbatim_(); |
318 | |
|
319 | 8 | sink.verbatim_(); |
320 | |
} |
321 | 158 | else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) ) |
322 | |
{ |
323 | 2 | sink.list_(); |
324 | |
} |
325 | 156 | else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) ) |
326 | |
{ |
327 | 2 | sink.numberedList_(); |
328 | 2 | orderedListDepth--; |
329 | |
} |
330 | 154 | else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) ) |
331 | |
{ |
332 | 4 | handleListItemEnd( sink ); |
333 | |
} |
334 | 150 | else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) ) |
335 | |
{ |
336 | 2 | sink.definitionList_(); |
337 | |
} |
338 | 148 | else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) ) |
339 | |
{ |
340 | 2 | sink.definedTerm_(); |
341 | |
} |
342 | 146 | else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) ) |
343 | |
{ |
344 | 2 | sink.definition_(); |
345 | 2 | sink.definitionListItem_(); |
346 | |
} |
347 | 144 | else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) ) |
348 | |
|| ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) ) |
349 | |
{ |
350 | 12 | sink.bold_(); |
351 | |
} |
352 | 132 | else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) ) |
353 | |
|| ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) ) |
354 | |
{ |
355 | 14 | handleFigureCaptionEnd( sink ); |
356 | |
} |
357 | 118 | else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) ) |
358 | |
|| ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) ) |
359 | |
|| ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) ) |
360 | |
{ |
361 | 8 | sink.monospaced_(); |
362 | |
} |
363 | 110 | else if ( parser.getName().equals( HtmlMarkup.A.toString() ) ) |
364 | |
{ |
365 | 20 | handleAEnd( sink ); |
366 | |
} |
367 | |
|
368 | |
|
369 | |
|
370 | |
|
371 | |
|
372 | 90 | else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) ) |
373 | |
{ |
374 | 4 | sink.tableRows_(); |
375 | |
|
376 | 4 | sink.table_(); |
377 | |
} |
378 | 86 | else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) ) |
379 | |
{ |
380 | 8 | sink.tableRow_(); |
381 | |
} |
382 | 78 | else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) ) |
383 | |
{ |
384 | 4 | sink.tableHeaderCell_(); |
385 | |
} |
386 | 74 | else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) ) |
387 | |
{ |
388 | 4 | sink.tableCell_(); |
389 | |
} |
390 | 70 | else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) ) |
391 | |
{ |
392 | 2 | sink.tableCaption_(); |
393 | |
} |
394 | 68 | else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) ) |
395 | |
{ |
396 | 26 | sink.sectionTitle1_(); |
397 | |
} |
398 | 42 | else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) ) |
399 | |
{ |
400 | 10 | sink.sectionTitle2_(); |
401 | |
} |
402 | 32 | else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) ) |
403 | |
{ |
404 | 8 | sink.sectionTitle3_(); |
405 | |
} |
406 | 24 | else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) ) |
407 | |
{ |
408 | 2 | sink.sectionTitle4_(); |
409 | |
} |
410 | 22 | else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) ) |
411 | |
{ |
412 | 4 | sink.sectionTitle5_(); |
413 | |
} |
414 | 18 | else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() ) ) |
415 | |
{ |
416 | 2 | handleUnknown( parser, sink, TAG_TYPE_END ); |
417 | |
|
418 | 2 | scriptBlock = false; |
419 | |
} |
420 | |
else |
421 | |
{ |
422 | 16 | visited = false; |
423 | |
} |
424 | |
|
425 | 214 | return visited; |
426 | |
} |
427 | |
|
428 | |
|
429 | |
|
430 | |
|
431 | |
|
432 | |
|
433 | |
|
434 | |
protected void handleStartTag( XmlPullParser parser, Sink sink ) |
435 | |
throws XmlPullParserException, MacroExecutionException |
436 | |
{ |
437 | 214 | if ( !baseStartTag( parser, sink ) ) |
438 | |
{ |
439 | 20 | if ( getLog().isWarnEnabled() ) |
440 | |
{ |
441 | 6 | String position = "[" + parser.getLineNumber() + ":" |
442 | |
+ parser.getColumnNumber() + "]"; |
443 | 6 | String tag = "<" + parser.getName() + ">"; |
444 | |
|
445 | 6 | getLog().warn( "Unrecognized xml tag: " + tag + " at " + position ); |
446 | |
} |
447 | |
} |
448 | 214 | } |
449 | |
|
450 | |
|
451 | |
|
452 | |
|
453 | |
|
454 | |
|
455 | |
|
456 | |
protected void handleEndTag( XmlPullParser parser, Sink sink ) |
457 | |
throws XmlPullParserException, MacroExecutionException |
458 | |
{ |
459 | 214 | if ( !baseEndTag( parser, sink ) ) |
460 | |
{ |
461 | |
|
462 | |
} |
463 | 214 | } |
464 | |
|
465 | |
|
466 | |
@Override |
467 | |
protected void handleText( XmlPullParser parser, Sink sink ) |
468 | |
throws XmlPullParserException |
469 | |
{ |
470 | 78 | String text = getText( parser ); |
471 | |
|
472 | |
|
473 | |
|
474 | |
|
475 | |
|
476 | |
|
477 | |
|
478 | 78 | if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() ) |
479 | |
{ |
480 | 78 | sink.text( text, decoration ); |
481 | |
} |
482 | 78 | } |
483 | |
|
484 | |
|
485 | |
@Override |
486 | |
protected void handleComment( XmlPullParser parser, Sink sink ) |
487 | |
throws XmlPullParserException |
488 | |
{ |
489 | 4 | String text = getText( parser ).trim(); |
490 | |
|
491 | 4 | if ( "PB".equals( text ) ) |
492 | |
{ |
493 | 2 | sink.pageBreak(); |
494 | |
} |
495 | |
else |
496 | |
{ |
497 | 2 | sink.comment( text ); |
498 | |
} |
499 | 4 | } |
500 | |
|
501 | |
|
502 | |
@Override |
503 | |
protected void handleCdsect( XmlPullParser parser, Sink sink ) |
504 | |
throws XmlPullParserException |
505 | |
{ |
506 | 4 | String text = getText( parser ); |
507 | |
|
508 | 4 | if ( isScriptBlock() ) |
509 | |
{ |
510 | 0 | sink.unknown( CDATA, new Object[] {new Integer( CDATA_TYPE ), text}, null ); |
511 | |
} |
512 | |
else |
513 | |
{ |
514 | 4 | sink.text( text ); |
515 | |
} |
516 | 4 | } |
517 | |
|
518 | |
|
519 | |
|
520 | |
|
521 | |
|
522 | |
|
523 | |
|
524 | |
|
525 | |
|
526 | |
|
527 | |
|
528 | |
|
529 | |
|
530 | |
|
531 | |
|
532 | |
|
533 | |
|
534 | |
|
535 | |
|
536 | |
|
537 | |
|
538 | |
|
539 | |
|
540 | |
|
541 | |
|
542 | |
|
543 | |
|
544 | |
|
545 | |
|
546 | |
protected void consecutiveSections( int newLevel, Sink sink ) |
547 | |
{ |
548 | 50 | closeOpenSections( newLevel, sink ); |
549 | 50 | openMissingSections( newLevel, sink ); |
550 | |
|
551 | 50 | this.sectionLevel = newLevel; |
552 | 50 | } |
553 | |
|
554 | |
|
555 | |
|
556 | |
|
557 | |
|
558 | |
|
559 | |
|
560 | |
private void closeOpenSections( int newLevel, Sink sink ) |
561 | |
{ |
562 | 92 | while ( this.sectionLevel >= newLevel ) |
563 | |
{ |
564 | 42 | if ( sectionLevel == Sink.SECTION_LEVEL_5 ) |
565 | |
{ |
566 | 4 | sink.section5_(); |
567 | |
} |
568 | 38 | else if ( sectionLevel == Sink.SECTION_LEVEL_4 ) |
569 | |
{ |
570 | 4 | sink.section4_(); |
571 | |
} |
572 | 34 | else if ( sectionLevel == Sink.SECTION_LEVEL_3 ) |
573 | |
{ |
574 | 10 | sink.section3_(); |
575 | |
} |
576 | 24 | else if ( sectionLevel == Sink.SECTION_LEVEL_2 ) |
577 | |
{ |
578 | 10 | sink.section2_(); |
579 | |
} |
580 | 14 | else if ( sectionLevel == Sink.SECTION_LEVEL_1 ) |
581 | |
{ |
582 | 14 | sink.section1_(); |
583 | |
} |
584 | |
|
585 | 42 | this.sectionLevel--; |
586 | |
} |
587 | 50 | } |
588 | |
|
589 | |
|
590 | |
|
591 | |
|
592 | |
|
593 | |
|
594 | |
|
595 | |
private void openMissingSections( int newLevel, Sink sink ) |
596 | |
{ |
597 | 56 | while ( this.sectionLevel < newLevel - 1 ) |
598 | |
{ |
599 | 6 | this.sectionLevel++; |
600 | |
|
601 | 6 | if ( sectionLevel == Sink.SECTION_LEVEL_5 ) |
602 | |
{ |
603 | 0 | sink.section5(); |
604 | |
} |
605 | 6 | else if ( sectionLevel == Sink.SECTION_LEVEL_4 ) |
606 | |
{ |
607 | 2 | sink.section4(); |
608 | |
} |
609 | 4 | else if ( sectionLevel == Sink.SECTION_LEVEL_3 ) |
610 | |
{ |
611 | 2 | sink.section3(); |
612 | |
} |
613 | 2 | else if ( sectionLevel == Sink.SECTION_LEVEL_2 ) |
614 | |
{ |
615 | 2 | sink.section2(); |
616 | |
} |
617 | 0 | else if ( sectionLevel == Sink.SECTION_LEVEL_1 ) |
618 | |
{ |
619 | 0 | sink.section1(); |
620 | |
} |
621 | |
} |
622 | 50 | } |
623 | |
|
624 | |
|
625 | |
|
626 | |
|
627 | |
|
628 | |
|
629 | |
protected int getSectionLevel() |
630 | |
{ |
631 | 0 | return this.sectionLevel; |
632 | |
} |
633 | |
|
634 | |
|
635 | |
|
636 | |
|
637 | |
|
638 | |
|
639 | |
protected void setSectionLevel( int newLevel ) |
640 | |
{ |
641 | 0 | this.sectionLevel = newLevel; |
642 | 0 | } |
643 | |
|
644 | |
|
645 | |
|
646 | |
|
647 | |
protected void verbatim_() |
648 | |
{ |
649 | 8 | this.inVerbatim = false; |
650 | 8 | } |
651 | |
|
652 | |
|
653 | |
|
654 | |
|
655 | |
protected void verbatim() |
656 | |
{ |
657 | 8 | this.inVerbatim = true; |
658 | 8 | } |
659 | |
|
660 | |
|
661 | |
|
662 | |
|
663 | |
|
664 | |
|
665 | |
protected boolean isVerbatim() |
666 | |
{ |
667 | 0 | return this.inVerbatim; |
668 | |
} |
669 | |
|
670 | |
|
671 | |
|
672 | |
|
673 | |
|
674 | |
|
675 | |
|
676 | |
|
677 | |
protected boolean isScriptBlock() |
678 | |
{ |
679 | 82 | return this.scriptBlock; |
680 | |
} |
681 | |
|
682 | |
|
683 | |
|
684 | |
|
685 | |
|
686 | |
|
687 | |
|
688 | |
|
689 | |
protected String validAnchor( String id ) |
690 | |
{ |
691 | 6 | if ( !DoxiaUtils.isValidId( id ) ) |
692 | |
{ |
693 | 4 | String linkAnchor = DoxiaUtils.encodeId( id, true ); |
694 | |
|
695 | 4 | String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'"; |
696 | 4 | logMessage( "modifiedLink", msg ); |
697 | |
|
698 | 4 | return linkAnchor; |
699 | |
} |
700 | |
|
701 | 2 | return id; |
702 | |
} |
703 | |
|
704 | |
|
705 | |
@Override |
706 | |
protected void init() |
707 | |
{ |
708 | 208 | super.init(); |
709 | |
|
710 | 208 | this.scriptBlock = false; |
711 | 208 | this.isLink = false; |
712 | 208 | this.isAnchor = false; |
713 | 208 | this.orderedListDepth = 0; |
714 | 208 | this.sectionLevel = 0; |
715 | 208 | this.inVerbatim = false; |
716 | 208 | this.inFigure = false; |
717 | 208 | while ( this.decoration.getAttributeNames().hasMoreElements() ) |
718 | |
{ |
719 | 0 | this.decoration.removeAttribute( this.decoration.getAttributeNames().nextElement() ); |
720 | |
} |
721 | 208 | this.warnMessages = null; |
722 | 208 | } |
723 | |
|
724 | |
private void handleAEnd( Sink sink ) |
725 | |
{ |
726 | 20 | if ( isLink ) |
727 | |
{ |
728 | 14 | sink.link_(); |
729 | 14 | isLink = false; |
730 | |
} |
731 | 6 | else if ( isAnchor ) |
732 | |
{ |
733 | 6 | sink.anchor_(); |
734 | 6 | isAnchor = false; |
735 | |
} |
736 | 20 | } |
737 | |
|
738 | |
private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs ) |
739 | |
{ |
740 | 20 | String href = parser.getAttributeValue( null, Attribute.HREF.toString() ); |
741 | |
|
742 | 20 | if ( href != null ) |
743 | |
{ |
744 | 14 | int hashIndex = href.indexOf( '#'); |
745 | 14 | if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) ) |
746 | |
{ |
747 | 2 | String hash = href.substring( hashIndex + 1 ); |
748 | |
|
749 | 2 | if ( !DoxiaUtils.isValidId( hash ) ) |
750 | |
{ |
751 | 2 | href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true ); |
752 | |
|
753 | 2 | String msg = "Modified invalid link: '" + hash + "' to '" + href + "'"; |
754 | 2 | logMessage( "modifiedLink", msg ); |
755 | |
} |
756 | |
} |
757 | 14 | sink.link( href, attribs ); |
758 | 14 | isLink = true; |
759 | 14 | } |
760 | |
else |
761 | |
{ |
762 | 6 | String name = parser.getAttributeValue( null, Attribute.NAME.toString() ); |
763 | |
|
764 | 6 | if ( name != null ) |
765 | |
{ |
766 | 4 | sink.anchor( validAnchor( name ), attribs ); |
767 | 4 | isAnchor = true; |
768 | |
} |
769 | |
else |
770 | |
{ |
771 | 2 | String id = parser.getAttributeValue( null, Attribute.ID.toString() ); |
772 | 2 | if ( id != null ) |
773 | |
{ |
774 | 2 | sink.anchor( validAnchor( id ), attribs ); |
775 | 2 | isAnchor = true; |
776 | |
} |
777 | |
} |
778 | |
} |
779 | 20 | } |
780 | |
|
781 | |
private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink ) |
782 | |
{ |
783 | 18 | boolean visited = true; |
784 | |
|
785 | 18 | String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() ); |
786 | |
|
787 | 18 | if ( "figure".equals( divclass ) ) |
788 | |
{ |
789 | 2 | this.inFigure = true; |
790 | 2 | SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs ); |
791 | 2 | atts.removeAttribute( SinkEventAttributes.CLASS ); |
792 | 2 | sink.figure( atts ); |
793 | 2 | } |
794 | |
else |
795 | |
{ |
796 | 16 | visited = false; |
797 | |
} |
798 | |
|
799 | 18 | return visited; |
800 | |
} |
801 | |
|
802 | |
private void handleFigureCaptionEnd( Sink sink ) |
803 | |
{ |
804 | 14 | if ( inFigure ) |
805 | |
{ |
806 | 2 | sink.figureCaption_(); |
807 | |
} |
808 | |
else |
809 | |
{ |
810 | 12 | sink.italic_(); |
811 | |
} |
812 | 14 | } |
813 | |
|
814 | |
private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs ) |
815 | |
{ |
816 | 14 | if ( inFigure ) |
817 | |
{ |
818 | 2 | sink.figureCaption( attribs ); |
819 | |
} |
820 | |
else |
821 | |
{ |
822 | 12 | sink.italic(); |
823 | |
} |
824 | 14 | } |
825 | |
|
826 | |
private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs ) |
827 | |
{ |
828 | 8 | String src = parser.getAttributeValue( null, Attribute.SRC.toString() ); |
829 | |
|
830 | 8 | if ( src != null ) |
831 | |
{ |
832 | 8 | sink.figureGraphics( src, attribs ); |
833 | |
} |
834 | 8 | } |
835 | |
|
836 | |
private void handleLIStart( Sink sink, SinkEventAttributeSet attribs ) |
837 | |
{ |
838 | 4 | if ( orderedListDepth == 0 ) |
839 | |
{ |
840 | 2 | sink.listItem( attribs ); |
841 | |
} |
842 | |
else |
843 | |
{ |
844 | 2 | sink.numberedListItem( attribs ); |
845 | |
} |
846 | 4 | } |
847 | |
|
848 | |
private void handleListItemEnd( Sink sink ) |
849 | |
{ |
850 | 4 | if ( orderedListDepth == 0 ) |
851 | |
{ |
852 | 2 | sink.listItem_(); |
853 | |
} |
854 | |
else |
855 | |
{ |
856 | 2 | sink.numberedListItem_(); |
857 | |
} |
858 | 4 | } |
859 | |
|
860 | |
private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs ) |
861 | |
{ |
862 | 2 | int numbering = Sink.NUMBERING_DECIMAL; |
863 | |
|
864 | 2 | String style = parser.getAttributeValue( null, Attribute.STYLE.toString() ); |
865 | |
|
866 | 2 | if ( style != null ) |
867 | |
{ |
868 | 0 | if ( "list-style-type: upper-alpha".equals( style ) ) |
869 | |
{ |
870 | 0 | numbering = Sink.NUMBERING_UPPER_ALPHA; |
871 | |
} |
872 | 0 | else if ( "list-style-type: lower-alpha".equals( style ) ) |
873 | |
{ |
874 | 0 | numbering = Sink.NUMBERING_LOWER_ALPHA; |
875 | |
} |
876 | 0 | else if ( "list-style-type: upper-roman".equals( style ) ) |
877 | |
{ |
878 | 0 | numbering = Sink.NUMBERING_UPPER_ROMAN; |
879 | |
} |
880 | 0 | else if ( "list-style-type: lower-roman".equals( style ) ) |
881 | |
{ |
882 | 0 | numbering = Sink.NUMBERING_LOWER_ROMAN; |
883 | |
} |
884 | 0 | else if ( "list-style-type: decimal".equals( style ) ) |
885 | |
{ |
886 | 0 | numbering = Sink.NUMBERING_DECIMAL; |
887 | |
} |
888 | |
} |
889 | |
|
890 | 2 | sink.numberedList( numbering, attribs ); |
891 | 2 | orderedListDepth++; |
892 | 2 | } |
893 | |
|
894 | |
private void handlePStart( Sink sink, SinkEventAttributeSet attribs ) |
895 | |
{ |
896 | 18 | if ( !inFigure ) |
897 | |
{ |
898 | 14 | sink.paragraph( attribs ); |
899 | |
} |
900 | 18 | } |
901 | |
|
902 | |
|
903 | |
|
904 | |
|
905 | |
|
906 | |
|
907 | |
|
908 | |
|
909 | |
|
910 | |
|
911 | |
|
912 | |
private void handlePreStart( SinkEventAttributeSet attribs, Sink sink ) |
913 | |
{ |
914 | 8 | verbatim(); |
915 | 8 | attribs.removeAttribute( SinkEventAttributes.DECORATION ); |
916 | 8 | sink.verbatim( attribs ); |
917 | 8 | } |
918 | |
|
919 | |
private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs ) |
920 | |
{ |
921 | 50 | consecutiveSections( level, sink ); |
922 | 50 | sink.section( level, attribs ); |
923 | 50 | sink.sectionTitle( level, attribs ); |
924 | 50 | } |
925 | |
|
926 | |
private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser ) |
927 | |
{ |
928 | 4 | sink.table( attribs ); |
929 | 4 | String border = parser.getAttributeValue( null, Attribute.BORDER.toString() ); |
930 | 4 | boolean grid = true; |
931 | |
|
932 | 4 | if ( border == null || "0".equals( border ) ) |
933 | |
{ |
934 | 4 | grid = false; |
935 | |
} |
936 | |
|
937 | 4 | String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() ); |
938 | 4 | int[] justif = {Sink.JUSTIFY_LEFT}; |
939 | |
|
940 | 4 | if ( "center".equals( align ) ) |
941 | |
{ |
942 | 2 | justif[0] = Sink.JUSTIFY_CENTER; |
943 | |
} |
944 | 2 | else if ( "right".equals( align ) ) |
945 | |
{ |
946 | 0 | justif[0] = Sink.JUSTIFY_RIGHT; |
947 | |
} |
948 | |
|
949 | 4 | sink.tableRows( justif, grid ); |
950 | 4 | } |
951 | |
|
952 | |
|
953 | |
|
954 | |
|
955 | |
|
956 | |
|
957 | |
|
958 | |
|
959 | |
|
960 | |
private void logMessage( String key, String msg ) |
961 | |
{ |
962 | 6 | final String log = "[XHTML Parser] " + msg; |
963 | 6 | if ( getLog().isDebugEnabled() ) |
964 | |
{ |
965 | 0 | getLog().debug( log ); |
966 | |
|
967 | 0 | return; |
968 | |
} |
969 | |
|
970 | 6 | if ( warnMessages == null ) |
971 | |
{ |
972 | 2 | warnMessages = new HashMap<String, Set<String>>(); |
973 | |
} |
974 | |
|
975 | 6 | Set<String> set = warnMessages.get( key ); |
976 | 6 | if ( set == null ) |
977 | |
{ |
978 | 2 | set = new TreeSet<String>(); |
979 | |
} |
980 | 6 | set.add( log ); |
981 | 6 | warnMessages.put( key, set ); |
982 | 6 | } |
983 | |
|
984 | |
|
985 | |
|
986 | |
|
987 | |
private void logWarnings() |
988 | |
{ |
989 | 52 | if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() ) |
990 | |
{ |
991 | 0 | for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() ) |
992 | |
{ |
993 | 0 | for ( String msg : entry.getValue() ) |
994 | |
{ |
995 | 0 | getLog().warn( msg ); |
996 | |
} |
997 | |
} |
998 | |
|
999 | 0 | this.warnMessages = null; |
1000 | |
} |
1001 | 52 | } |
1002 | |
} |