1 | |
package org.apache.maven.doxia.util; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.UnsupportedEncodingException; |
23 | |
import java.util.ArrayList; |
24 | |
import java.util.HashMap; |
25 | |
import java.util.List; |
26 | |
import java.util.Map; |
27 | |
|
28 | |
import javax.swing.text.html.HTML.Tag; |
29 | |
|
30 | |
import org.apache.commons.lang.StringEscapeUtils; |
31 | |
import org.apache.maven.doxia.markup.HtmlMarkup; |
32 | |
import org.codehaus.plexus.util.StringUtils; |
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
public class HtmlTools |
42 | |
{ |
43 | 2 | private static final Tag[] ALL_TAGS = |
44 | |
{ |
45 | |
HtmlMarkup.A, HtmlMarkup.ABBR, HtmlMarkup.ACRONYM, HtmlMarkup.ADDRESS, HtmlMarkup.APPLET, |
46 | |
HtmlMarkup.AREA, HtmlMarkup.B, HtmlMarkup.BASE, HtmlMarkup.BASEFONT, HtmlMarkup.BDO, |
47 | |
HtmlMarkup.BIG, HtmlMarkup.BLOCKQUOTE, HtmlMarkup.BODY, HtmlMarkup.BR, HtmlMarkup.BUTTON, |
48 | |
HtmlMarkup.CAPTION, HtmlMarkup.CENTER, HtmlMarkup.CITE, HtmlMarkup.CODE, HtmlMarkup.COL, |
49 | |
HtmlMarkup.COLGROUP, HtmlMarkup.DD, HtmlMarkup.DEL, HtmlMarkup.DFN, HtmlMarkup.DIR, |
50 | |
HtmlMarkup.DIV, HtmlMarkup.DL, HtmlMarkup.DT, HtmlMarkup.EM, HtmlMarkup.FIELDSET, |
51 | |
HtmlMarkup.FONT, HtmlMarkup.FORM, HtmlMarkup.FRAME, HtmlMarkup.FRAMESET, HtmlMarkup.H1, |
52 | |
HtmlMarkup.H2, HtmlMarkup.H3, HtmlMarkup.H4, HtmlMarkup.H5, HtmlMarkup.H6, HtmlMarkup.HEAD, |
53 | |
HtmlMarkup.HR, HtmlMarkup.HTML, HtmlMarkup.I, HtmlMarkup.IFRAME, HtmlMarkup.IMG, |
54 | |
HtmlMarkup.INPUT, HtmlMarkup.INS, HtmlMarkup.ISINDEX, HtmlMarkup.KBD, HtmlMarkup.LABEL, |
55 | |
HtmlMarkup.LEGEND, HtmlMarkup.LI, HtmlMarkup.LINK, HtmlMarkup.MAP, HtmlMarkup.MENU, |
56 | |
HtmlMarkup.META, HtmlMarkup.NOFRAMES, HtmlMarkup.NOSCRIPT, HtmlMarkup.OBJECT, HtmlMarkup.OL, |
57 | |
HtmlMarkup.OPTGROUP, HtmlMarkup.OPTION, HtmlMarkup.P, HtmlMarkup.PARAM, HtmlMarkup.PRE, |
58 | |
HtmlMarkup.Q, HtmlMarkup.S, HtmlMarkup.SAMP, HtmlMarkup.SCRIPT, HtmlMarkup.SELECT, |
59 | |
HtmlMarkup.SMALL, HtmlMarkup.SPAN, HtmlMarkup.STRIKE, HtmlMarkup.STRONG, HtmlMarkup.STYLE, |
60 | |
HtmlMarkup.SUB, HtmlMarkup.SUP, HtmlMarkup.TABLE, HtmlMarkup.TBODY, HtmlMarkup.TD, |
61 | |
HtmlMarkup.TEXTAREA, HtmlMarkup.TFOOT, HtmlMarkup.TH, HtmlMarkup.THEAD, HtmlMarkup.TITLE, |
62 | |
HtmlMarkup.TR, HtmlMarkup.TT, HtmlMarkup.U, HtmlMarkup.UL, HtmlMarkup.VAR |
63 | |
}; |
64 | |
|
65 | 2 | private static final Map<String, Tag> TAG_MAP = new HashMap<String, Tag>( ALL_TAGS.length ); |
66 | |
|
67 | |
private static final int ASCII = 0x7E; |
68 | |
|
69 | |
static |
70 | |
{ |
71 | 184 | for ( Tag tag : ALL_TAGS ) |
72 | |
{ |
73 | 182 | TAG_MAP.put( tag.toString(), tag ); |
74 | |
} |
75 | 2 | } |
76 | |
|
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
|
82 | |
|
83 | |
|
84 | |
|
85 | |
|
86 | |
|
87 | |
|
88 | |
|
89 | |
public static Tag getHtmlTag( String tagName ) |
90 | |
{ |
91 | 12 | Object t = TAG_MAP.get( tagName ); |
92 | |
|
93 | 12 | return (Tag) t; |
94 | |
} |
95 | |
|
96 | |
|
97 | |
|
98 | |
|
99 | |
|
100 | |
|
101 | |
|
102 | |
|
103 | |
|
104 | |
|
105 | |
public static String escapeHTML( String text ) |
106 | |
{ |
107 | 32 | return escapeHTML( text, true ); |
108 | |
} |
109 | |
|
110 | |
|
111 | |
|
112 | |
|
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
|
122 | |
|
123 | |
|
124 | |
|
125 | |
|
126 | |
|
127 | |
|
128 | |
|
129 | |
|
130 | |
|
131 | |
|
132 | |
|
133 | |
|
134 | |
|
135 | |
|
136 | |
|
137 | |
|
138 | |
public static String escapeHTML( final String text, final boolean xmlMode ) |
139 | |
{ |
140 | 102 | if ( text == null ) |
141 | |
{ |
142 | 2 | return ""; |
143 | |
} |
144 | |
|
145 | 100 | int length = text.length(); |
146 | 100 | StringBuilder buffer = new StringBuilder( length ); |
147 | |
|
148 | 872 | for ( int i = 0; i < length; ++i ) |
149 | |
{ |
150 | 772 | char c = text.charAt( i ); |
151 | 772 | switch ( c ) |
152 | |
{ |
153 | |
case '<': |
154 | 2 | buffer.append( "<" ); |
155 | 2 | break; |
156 | |
case '>': |
157 | 2 | buffer.append( ">" ); |
158 | 2 | break; |
159 | |
case '&': |
160 | 14 | buffer.append( "&" ); |
161 | 14 | break; |
162 | |
case '\"': |
163 | 2 | buffer.append( """ ); |
164 | 2 | break; |
165 | |
default: |
166 | 752 | if ( xmlMode ) |
167 | |
{ |
168 | 70 | if ( c == '\'' ) |
169 | |
{ |
170 | 2 | buffer.append( "'" ); |
171 | |
} |
172 | |
else |
173 | |
{ |
174 | 68 | buffer.append( c ); |
175 | |
} |
176 | |
} |
177 | |
else |
178 | |
{ |
179 | 682 | if ( c <= ASCII ) |
180 | |
{ |
181 | |
|
182 | 670 | buffer.append( c ); |
183 | |
} |
184 | |
else |
185 | |
{ |
186 | 12 | buffer.append( "&#x" ); |
187 | 12 | if ( isHighSurrogate( c ) ) |
188 | |
{ |
189 | 2 | buffer.append( Integer.toHexString( toCodePoint( c, text.charAt( ++i ) ) ) ); |
190 | |
} |
191 | |
else |
192 | |
{ |
193 | 10 | buffer.append( Integer.toHexString( c ) ); |
194 | |
} |
195 | 12 | buffer.append( ';' ); |
196 | |
} |
197 | |
} |
198 | |
} |
199 | |
} |
200 | |
|
201 | 100 | return buffer.toString(); |
202 | |
} |
203 | |
|
204 | |
|
205 | |
|
206 | |
|
207 | |
|
208 | |
|
209 | |
|
210 | |
|
211 | |
|
212 | |
public static String unescapeHTML( String text ) |
213 | |
{ |
214 | 70 | return unescapeHTML( text, false ); |
215 | |
} |
216 | |
|
217 | |
|
218 | |
|
219 | |
|
220 | |
|
221 | |
|
222 | |
|
223 | |
|
224 | |
|
225 | |
|
226 | |
|
227 | |
|
228 | |
|
229 | |
|
230 | |
|
231 | |
|
232 | |
|
233 | |
|
234 | |
|
235 | |
|
236 | |
|
237 | |
|
238 | |
public static String unescapeHTML( String text, boolean xmlMode ) |
239 | |
{ |
240 | 72 | if ( text == null ) |
241 | |
{ |
242 | 2 | return null; |
243 | |
} |
244 | |
|
245 | |
String unescaped; |
246 | 70 | if ( xmlMode ) |
247 | |
{ |
248 | 2 | unescaped = StringEscapeUtils.unescapeXml( text ); |
249 | |
} |
250 | |
else |
251 | |
{ |
252 | |
|
253 | 68 | unescaped = StringEscapeUtils.unescapeHtml( text ); |
254 | |
} |
255 | |
|
256 | 70 | String tmp = unescaped; |
257 | 70 | List<String> entities = new ArrayList<String>(); |
258 | |
while ( true ) |
259 | |
{ |
260 | 96 | int i = tmp.indexOf( "&#x" ); |
261 | 96 | if ( i == -1 ) |
262 | |
{ |
263 | 70 | break; |
264 | |
} |
265 | |
|
266 | 26 | tmp = tmp.substring( i + 3 ); |
267 | 26 | if ( tmp.indexOf( ';' ) != -1 ) |
268 | |
{ |
269 | 22 | String entity = tmp.substring( 0, tmp.indexOf( ';' ) ); |
270 | |
try |
271 | |
{ |
272 | 22 | Integer.parseInt( entity, 16 ); |
273 | 18 | entities.add( entity ); |
274 | |
} |
275 | 4 | catch ( NumberFormatException e ) |
276 | |
{ |
277 | |
|
278 | 18 | } |
279 | |
} |
280 | 26 | } |
281 | |
|
282 | 70 | for ( String entity : entities ) |
283 | |
{ |
284 | 18 | int codePoint = Integer.parseInt( entity, 16 ); |
285 | 18 | unescaped = StringUtils.replace( unescaped, "&#x" + entity + ";", new String( toChars( codePoint ) ) ); |
286 | 18 | } |
287 | |
|
288 | 70 | return unescaped; |
289 | |
} |
290 | |
|
291 | |
|
292 | |
|
293 | |
|
294 | |
|
295 | |
|
296 | |
|
297 | |
public static String encodeURL( String url ) |
298 | |
{ |
299 | 10 | if ( url == null ) |
300 | |
{ |
301 | 2 | return null; |
302 | |
} |
303 | |
|
304 | 8 | StringBuilder encoded = new StringBuilder(); |
305 | 8 | int length = url.length(); |
306 | |
|
307 | 8 | char[] unicode = new char[1]; |
308 | |
|
309 | 210 | for ( int i = 0; i < length; ++i ) |
310 | |
{ |
311 | 202 | char c = url.charAt( i ); |
312 | |
|
313 | 202 | switch ( c ) |
314 | |
{ |
315 | |
case ';': |
316 | |
case '/': |
317 | |
case '?': |
318 | |
case ':': |
319 | |
case '@': |
320 | |
case '&': |
321 | |
case '=': |
322 | |
case '+': |
323 | |
case '$': |
324 | |
case ',': |
325 | |
case '[': |
326 | |
case ']': |
327 | |
case '-': |
328 | |
case '_': |
329 | |
case '.': |
330 | |
case '!': |
331 | |
case '~': |
332 | |
case '*': |
333 | |
case '\'': |
334 | |
case '(': |
335 | |
case ')': |
336 | |
case '#': |
337 | 34 | encoded.append( c ); |
338 | 34 | break; |
339 | |
default: |
340 | 168 | if ( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) ) |
341 | |
{ |
342 | 146 | encoded.append( c ); |
343 | |
} |
344 | |
else |
345 | |
{ |
346 | |
byte[] bytes; |
347 | |
|
348 | |
try |
349 | |
{ |
350 | 22 | if ( isHighSurrogate( c ) ) |
351 | |
{ |
352 | 2 | int codePoint = toCodePoint( c, url.charAt( ++i ) ); |
353 | 2 | unicode = toChars( codePoint ); |
354 | 2 | bytes = ( new String( unicode, 0, unicode.length ) ).getBytes( "UTF8" ); |
355 | 2 | } |
356 | |
else |
357 | |
{ |
358 | 20 | unicode[0] = c; |
359 | 20 | bytes = ( new String( unicode, 0, 1 ) ).getBytes( "UTF8" ); |
360 | |
} |
361 | |
} |
362 | 0 | catch ( UnsupportedEncodingException cannotHappen ) |
363 | |
{ |
364 | 0 | bytes = new byte[0]; |
365 | 22 | } |
366 | |
|
367 | 50 | for ( int j = 0; j < bytes.length; ++j ) |
368 | |
{ |
369 | 28 | String hex = DoxiaUtils.byteToHex( bytes[j] ); |
370 | |
|
371 | 28 | encoded.append( '%' ); |
372 | 28 | if ( hex.length() == 1 ) |
373 | |
{ |
374 | 0 | encoded.append( '0' ); |
375 | |
} |
376 | 28 | encoded.append( hex ); |
377 | |
} |
378 | |
} |
379 | |
} |
380 | |
} |
381 | |
|
382 | 8 | return encoded.toString(); |
383 | |
} |
384 | |
|
385 | |
|
386 | |
|
387 | |
|
388 | |
|
389 | |
|
390 | |
|
391 | |
|
392 | |
|
393 | |
|
394 | |
|
395 | |
|
396 | |
|
397 | |
|
398 | |
public static String encodeId( String id ) |
399 | |
{ |
400 | 84 | return DoxiaUtils.encodeId( id, true ); |
401 | |
} |
402 | |
|
403 | |
|
404 | |
|
405 | |
|
406 | |
|
407 | |
|
408 | |
|
409 | |
|
410 | |
|
411 | |
public static boolean isId( String text ) |
412 | |
{ |
413 | 30 | return DoxiaUtils.isValidId( text ); |
414 | |
} |
415 | |
|
416 | |
private HtmlTools() |
417 | 0 | { |
418 | |
|
419 | 0 | } |
420 | |
|
421 | |
|
422 | |
|
423 | |
|
424 | |
|
425 | |
|
426 | |
private static final char LUNATE_SIGMA = 0x3FF; |
427 | |
private static final char NON_PRIVATE_USE_HIGH_SURROGATE = 0xD800; |
428 | |
private static final char LOW_SURROGATE = 0xDC00; |
429 | |
|
430 | |
private static int toCodePoint( char high, char low ) |
431 | |
{ |
432 | |
|
433 | |
|
434 | 4 | int h = ( high & LUNATE_SIGMA ) << 10; |
435 | 4 | int l = low & LUNATE_SIGMA; |
436 | 4 | return ( h | l ) + MIN_SUPPLEMENTARY_CODE_POINT; |
437 | |
} |
438 | |
|
439 | |
private static final char MIN_HIGH_SURROGATE = '\uD800'; |
440 | |
private static final char MAX_HIGH_SURROGATE = '\uDBFF'; |
441 | |
|
442 | |
private static boolean isHighSurrogate( char ch ) |
443 | |
{ |
444 | 34 | return ( MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch ); |
445 | |
} |
446 | |
|
447 | |
private static final int MIN_CODE_POINT = 0x000000; |
448 | |
private static final int MAX_CODE_POINT = 0x10FFFF; |
449 | |
private static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; |
450 | |
|
451 | |
private static boolean isValidCodePoint( int codePoint ) |
452 | |
{ |
453 | 20 | return ( MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint ); |
454 | |
} |
455 | |
|
456 | |
private static boolean isSupplementaryCodePoint( int codePoint ) |
457 | |
{ |
458 | 20 | return ( MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint ); |
459 | |
} |
460 | |
|
461 | |
|
462 | |
|
463 | |
|
464 | |
|
465 | |
|
466 | |
|
467 | |
|
468 | |
public static char[] toChars( int codePoint ) |
469 | |
{ |
470 | 20 | if ( !isValidCodePoint( codePoint ) ) |
471 | |
{ |
472 | 0 | throw new IllegalArgumentException(); |
473 | |
} |
474 | |
|
475 | 20 | if ( isSupplementaryCodePoint( codePoint ) ) |
476 | |
{ |
477 | 20 | int cpPrime = codePoint - MIN_SUPPLEMENTARY_CODE_POINT; |
478 | 20 | int high = NON_PRIVATE_USE_HIGH_SURROGATE | ( ( cpPrime >> 10 ) & LUNATE_SIGMA ); |
479 | 20 | int low = LOW_SURROGATE | ( cpPrime & LUNATE_SIGMA ); |
480 | 20 | return new char[] { (char) high, (char) low }; |
481 | |
} |
482 | 0 | return new char[] { (char) codePoint }; |
483 | |
} |
484 | |
} |