1 package org.apache.maven.doxia.util; 2 3 /* 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 */ 21 22 import java.awt.image.BufferedImage; 23 24 import java.io.File; 25 import java.io.IOException; 26 import java.io.UnsupportedEncodingException; 27 28 import java.net.URL; 29 30 import java.text.ParseException; 31 import java.text.ParsePosition; 32 import java.text.SimpleDateFormat; 33 34 import java.util.Date; 35 import java.util.Locale; 36 37 import javax.imageio.ImageIO; 38 39 import javax.swing.text.MutableAttributeSet; 40 41 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 42 43 /** 44 * General Doxia utility methods. The methods in this class should not assume 45 * any specific Doxia module or document format. 46 * 47 * @author ltheussl 48 * @since 1.1 49 * @version $Id: DoxiaUtils.java 1726411 2016-01-23 16:34:09Z hboutemy $ 50 */ 51 public class DoxiaUtils 52 { 53 private static final int MINUS_ONE = 0xFF; 54 55 /** 56 * Checks if the given string corresponds to an internal link, 57 * ie it is a link to an anchor within the same document. 58 * If link is not null, then exactly one of the three methods 59 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 60 * {@link #isLocalLink(java.lang.String)} will return true. 61 * 62 * @param link The link to check. Not null. 63 * @return True if the link starts with "#". 64 * 65 * @throws NullPointerException if link is null. 66 * 67 * @see #isExternalLink(String) 68 * @see #isLocalLink(String) 69 */ 70 public static boolean isInternalLink( final String link ) 71 { 72 return link.startsWith( "#" ); 73 } 74 75 /** 76 * Checks if the given string corresponds to an external URI, 77 * ie is not a link within the same document nor a relative link 78 * to another document (a local link) of the same site. 79 * If link is not null, then exactly one of the three methods 80 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 81 * {@link #isLocalLink(java.lang.String)} will return true. 82 * 83 * @param link The link to check. Not null. 84 * 85 * @return True if the link (ignoring case) starts with either "http:/", 86 * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://". 87 * Note that Windows style separators "\" are not allowed 88 * for URIs, see http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3. 89 * 90 * @throws NullPointerException if link is null. 91 * 92 * @see #isInternalLink(String) 93 * @see #isLocalLink(String) 94 */ 95 public static boolean isExternalLink( final String link ) 96 { 97 String text = link.toLowerCase( Locale.ENGLISH ); 98 99 return ( text.startsWith( "http:/" ) || text.startsWith( "https:/" ) 100 || text.startsWith( "ftp:/" ) || text.startsWith( "mailto:" ) 101 || text.startsWith( "file:/" ) || text.contains( "://" ) ); 102 } 103 104 /** 105 * Checks if the given string corresponds to a relative link to another document 106 * within the same site, ie it is neither an {@link #isInternalLink(String) internal} 107 * nor an {@link #isExternalLink(String) external} link. 108 * If link is not null, then exactly one of the three methods 109 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 110 * {@link #isLocalLink(java.lang.String)} will return true. 111 * 112 * @param link The link to check. Not null. 113 * 114 * @return True if the link is neither an external nor an internal link. 115 * 116 * @throws NullPointerException if link is null. 117 * 118 * @see #isExternalLink(String) 119 * @see #isInternalLink(String) 120 */ 121 public static boolean isLocalLink( final String link ) 122 { 123 return ( !isExternalLink( link ) && !isInternalLink( link ) ); 124 } 125 126 /** 127 * Construct a valid Doxia id. 128 * 129 * <p> 130 * This method is equivalent to {@link #encodeId(java.lang.String, boolean) encodeId( id, false )}. 131 * </p> 132 * 133 * @param id The id to be encoded. 134 * May be null in which case null is returned. 135 * 136 * @return The trimmed and encoded id, or null if id is null. 137 * 138 * @see #encodeId(java.lang.String, boolean) 139 */ 140 public static String encodeId( final String id ) 141 { 142 return encodeId( id, false ); 143 } 144 145 /** 146 * Construct a valid Doxia id. 147 * 148 * <p> 149 * A valid Doxia id obeys the same constraints as an HTML ID or NAME token. 150 * According to the <a href="http://www.w3.org/TR/html4/types.html#type-name"> 151 * HTML 4.01 specification section 6.2 SGML basic types</a>: 152 * </p> 153 * <p> 154 * <i>ID and NAME tokens must begin with a letter ([A-Za-z]) and may be 155 * followed by any number of letters, digits ([0-9]), hyphens ("-"), 156 * underscores ("_"), colons (":"), and periods (".").</i> 157 * </p> 158 * <p> 159 * According to <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML 1.0 160 * section C.8. Fragment Identifiers</a>: 161 * </p> 162 * <p> 163 * <i>When defining fragment identifiers to be backward-compatible, only 164 * strings matching the pattern [A-Za-z][A-Za-z0-9:_.-]* should be used.</i> 165 * </p> 166 * <p> 167 * To achieve this we need to convert the <i>id</i> String. Two conversions 168 * are necessary and one is done to get prettier ids: 169 * </p> 170 * <ol> 171 * <li>Remove whitespace at the start and end before starting to process</li> 172 * <li>If the first character is not a letter, prepend the id with the letter 'a'</li> 173 * <li>Any spaces are replaced with an underscore '_'</li> 174 * <li> 175 * Any characters not matching the above pattern are either dropped, 176 * or replaced according to the rules specified in the 177 * <a href="http://www.w3.org/TR/html4/appendix/notes.html#non-ascii-chars">HTML specs</a>. 178 * </li> 179 * </ol> 180 * <p> 181 * For letters, the case is preserved in the conversion. 182 * </p> 183 * 184 * <p> 185 * Here are some examples: 186 * </p> 187 * <pre> 188 * DoxiaUtils.encodeId( null ) = null 189 * DoxiaUtils.encodeId( "" ) = "a" 190 * DoxiaUtils.encodeId( " " ) = "a" 191 * DoxiaUtils.encodeId( " _ " ) = "a_" 192 * DoxiaUtils.encodeId( "1" ) = "a1" 193 * DoxiaUtils.encodeId( "1anchor" ) = "a1anchor" 194 * DoxiaUtils.encodeId( "_anchor" ) = "a_anchor" 195 * DoxiaUtils.encodeId( "a b-c123 " ) = "a_b-c123" 196 * DoxiaUtils.encodeId( " anchor" ) = "anchor" 197 * DoxiaUtils.encodeId( "myAnchor" ) = "myAnchor" 198 * </pre> 199 * 200 * @param id The id to be encoded. 201 * May be null in which case null is returned. 202 * @param chop true if non-ASCII characters should be ignored. 203 * If false, any non-ASCII characters will be replaced as specified above. 204 * 205 * @return The trimmed and encoded id, or null if id is null. 206 * If id is not null, the return value is guaranteed to be a valid Doxia id. 207 * 208 * @see #isValidId(java.lang.String) 209 * 210 * @since 1.1.1 211 */ 212 public static String encodeId( final String id, final boolean chop ) 213 { 214 if ( id == null ) 215 { 216 return null; 217 } 218 219 final String idd = id.trim(); 220 int length = idd.length(); 221 222 if ( length == 0 ) 223 { 224 return "a"; 225 } 226 227 StringBuilder buffer = new StringBuilder( length ); 228 229 for ( int i = 0; i < length; ++i ) 230 { 231 char c = idd.charAt( i ); 232 233 if ( ( i == 0 ) && ( !isAsciiLetter( c ) ) ) 234 { 235 buffer.append( 'a' ); 236 } 237 238 if ( c == ' ' ) 239 { 240 buffer.append( '_' ); 241 } 242 else if ( isAsciiLetter( c ) || isAsciiDigit( c ) || ( c == '-' ) || ( c == '_' ) || ( c == ':' ) 243 || ( c == '.' ) ) 244 { 245 buffer.append( c ); 246 } 247 else if ( !chop ) 248 { 249 byte[] bytes; 250 251 try 252 { 253 bytes = String.valueOf( c ).getBytes( "UTF8" ); 254 } 255 catch ( UnsupportedEncodingException cannotHappen ) 256 { 257 bytes = new byte[0]; 258 } 259 260 for ( int j = 0; j < bytes.length; ++j ) 261 { 262 String hex = byteToHex( bytes[j] ); 263 264 buffer.append( '%' ); 265 266 if ( hex.length() == 1 ) 267 { 268 buffer.append( '0' ); 269 } 270 271 buffer.append( hex ); 272 } 273 } 274 } 275 276 return buffer.toString(); 277 } 278 279 /** 280 * Convert a byte to it's hexadecimal equivalent. 281 * 282 * @param b the byte value. 283 * @return the result of Integer.toHexString( b & 0xFF ). 284 * 285 * @since 1.1.1 286 */ 287 public static String byteToHex( final byte b ) 288 { 289 return Integer.toHexString( b & MINUS_ONE ); 290 } 291 292 /** 293 * Determines if the specified text is a valid id according to the rules 294 * laid out in {@link #encodeId(String)}. 295 * 296 * @param text The text to be tested. 297 * May be null in which case false is returned. 298 * 299 * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>. 300 * 301 * @see #encodeId(String) 302 */ 303 public static boolean isValidId( final String text ) 304 { 305 if ( text == null || text.length() == 0 ) 306 { 307 return false; 308 } 309 310 for ( int i = 0; i < text.length(); ++i ) 311 { 312 char c = text.charAt( i ); 313 314 if ( isAsciiLetter( c ) ) 315 { 316 continue; 317 } 318 319 if ( ( i == 0 ) || ( c == ' ' ) || ( !isAsciiDigit( c ) && c != '-' && c != '_' && c != ':' && c != '.' ) ) 320 { 321 return false; 322 } 323 } 324 325 return true; 326 } 327 328 private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat( "", Locale.ENGLISH ); 329 private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition( 0 ); 330 private static final String[] DATE_PATTERNS = new String[] 331 { 332 "yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy", 333 "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy", 334 "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy" 335 }; 336 337 /** 338 * <p>Parses a string representing a date by trying different date patterns.</p> 339 * 340 * <p>The following date patterns are tried (in the given order):</p> 341 * 342 * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy", 343 * "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy", 344 * "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre> 345 * 346 * <p>A parse is only sucessful if it parses the whole of the input string. 347 * If no parse patterns match, a ParseException is thrown.</p> 348 * 349 * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code> 350 * (ignoring case) return the current date.</p> 351 * 352 * @param str the date to parse, not null. 353 * 354 * @return the parsed date, or the current date if the input String (ignoring case) was 355 * <code>"today"</code> or <code>"now"</code>. 356 * 357 * @throws ParseException if no pattern matches. 358 * @throws NullPointerException if str is null. 359 * 360 * @since 1.1.1. 361 */ 362 public static Date parseDate( final String str ) 363 throws ParseException 364 { 365 if ( "today".equalsIgnoreCase( str ) || "now".equalsIgnoreCase( str ) ) 366 { 367 return new Date(); 368 } 369 370 for ( int i = 0; i < DATE_PATTERNS.length; i++ ) 371 { 372 DATE_PARSER.applyPattern( DATE_PATTERNS[i] ); 373 DATE_PARSE_POSITION.setIndex( 0 ); 374 final Date date = DATE_PARSER.parse( str, DATE_PARSE_POSITION ); 375 376 if ( date != null && DATE_PARSE_POSITION.getIndex() == str.length() ) 377 { 378 return date; 379 } 380 } 381 382 throw new ParseException( "Unable to parse date: " + str, -1 ); 383 } 384 385 // 386 // private 387 // 388 389 private static boolean isAsciiLetter( final char c ) 390 { 391 return ( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) ); 392 } 393 394 private static boolean isAsciiDigit( final char c ) 395 { 396 return ( c >= '0' && c <= '9' ); 397 } 398 399 /** 400 * Determine width and height of an image. If successful, the returned SinkEventAttributes 401 * contain width and height attribute keys whose values are the width and height of the image (as a String). 402 * 403 * @param logo a String containing either a URL or a path to an image file. Not null. 404 * 405 * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image. 406 * 407 * @throws java.io.IOException if an error occurs during reading. 408 * @throws NullPointerException if logo is null. 409 * 410 * @since 1.1.1 411 */ 412 public static MutableAttributeSet getImageAttributes( final String logo ) 413 throws IOException 414 { 415 BufferedImage img = null; 416 417 if ( isExternalLink( logo ) ) 418 { 419 img = ImageIO.read( new URL( logo ) ); 420 } 421 else 422 { 423 img = ImageIO.read( new File( logo ) ); 424 } 425 426 if ( img == null ) 427 { 428 return null; 429 } 430 431 MutableAttributeSet atts = new SinkEventAttributeSet(); 432 atts.addAttribute( SinkEventAttributeSet.WIDTH, Integer.toString( img.getWidth() ) ); 433 atts.addAttribute( SinkEventAttributeSet.HEIGHT, Integer.toString( img.getHeight() ) ); 434 // add other attributes? 435 436 return atts; 437 } 438 439 private DoxiaUtils() 440 { 441 // utility class 442 } 443 }