1 package org.apache.maven.doxia.util; 2 3 /* 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 */ 21 22 import java.awt.image.BufferedImage; 23 24 import java.io.File; 25 import java.io.IOException; 26 27 import java.net.URL; 28 29 import java.nio.charset.StandardCharsets; 30 import java.text.ParseException; 31 import java.text.ParsePosition; 32 import java.text.SimpleDateFormat; 33 34 import java.util.Date; 35 import java.util.Locale; 36 37 import javax.imageio.ImageIO; 38 39 import javax.swing.text.MutableAttributeSet; 40 41 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 42 43 /** 44 * General Doxia utility methods. The methods in this class should not assume 45 * any specific Doxia module or document format. 46 * 47 * @author ltheussl 48 * @since 1.1 49 */ 50 public class DoxiaUtils 51 { 52 private static final int MINUS_ONE = 0xFF; 53 54 /** 55 * Checks if the given string corresponds to an internal link, 56 * ie it is a link to an anchor within the same document. 57 * If link is not null, then exactly one of the three methods 58 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 59 * {@link #isLocalLink(java.lang.String)} will return true. 60 * 61 * @param link The link to check. Not null. 62 * @return True if the link starts with "#". 63 * 64 * @throws NullPointerException if link is null. 65 * @see #isExternalLink(String) 66 * @see #isLocalLink(String) 67 */ 68 public static boolean isInternalLink( final String link ) 69 { 70 return link.startsWith( "#" ); 71 } 72 73 /** 74 * Checks if the given string corresponds to an external URI, 75 * ie is not a link within the same document nor a relative link 76 * to another document (a local link) of the same site. 77 * If link is not null, then exactly one of the three methods 78 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 79 * {@link #isLocalLink(java.lang.String)} will return true. 80 * 81 * @param link The link to check. Not null. 82 * @return True if the link (ignoring case) starts with either "http:/", 83 * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://". 84 * Note that Windows style separators "\" are not allowed 85 * for URIs, see http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3. 86 * 87 * @throws NullPointerException if link is null. 88 * 89 * @see #isInternalLink(String) 90 * @see #isLocalLink(String) 91 */ 92 public static boolean isExternalLink( final String link ) 93 { 94 String text = link.toLowerCase( Locale.ENGLISH ); 95 96 return ( text.startsWith( "http:/" ) || text.startsWith( "https:/" ) 97 || text.startsWith( "ftp:/" ) || text.startsWith( "mailto:" ) 98 || text.startsWith( "file:/" ) || text.contains( "://" ) ); 99 } 100 101 /** 102 * Checks if the given string corresponds to a relative link to another document 103 * within the same site, ie it is neither an {@link #isInternalLink(String) internal} 104 * nor an {@link #isExternalLink(String) external} link. 105 * If link is not null, then exactly one of the three methods 106 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 107 * {@link #isLocalLink(java.lang.String)} will return true. 108 * 109 * @param link The link to check. Not null. 110 * @return True if the link is neither an external nor an internal link. 111 * 112 * @throws NullPointerException if link is null. 113 * 114 * @see #isExternalLink(String) 115 * @see #isInternalLink(String) 116 */ 117 public static boolean isLocalLink( final String link ) 118 { 119 return ( !isExternalLink( link ) && !isInternalLink( link ) ); 120 } 121 122 /** 123 * Construct a valid Doxia id. 124 * 125 * <p> 126 * This method is equivalent to {@link #encodeId(java.lang.String, boolean) encodeId( id, false )}. 127 * </p> 128 * 129 * @param id The id to be encoded. 130 * May be null in which case null is returned. 131 * @return The trimmed and encoded id, or null if id is null. 132 * @see #encodeId(java.lang.String, boolean) 133 */ 134 public static String encodeId( final String id ) 135 { 136 return encodeId( id, false ); 137 } 138 139 /** 140 * Construct a valid Doxia id. 141 * 142 * <p> 143 * A valid Doxia id obeys the same constraints as an HTML ID or NAME token. 144 * According to the <a href="http://www.w3.org/TR/html4/types.html#type-name"> 145 * HTML 4.01 specification section 6.2 SGML basic types</a>: 146 * </p> 147 * <p> 148 * <i>ID and NAME tokens must begin with a letter ([A-Za-z]) and may be 149 * followed by any number of letters, digits ([0-9]), hyphens ("-"), 150 * underscores ("_"), colons (":"), and periods (".").</i> 151 * </p> 152 * <p> 153 * According to <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML 1.0 154 * section C.8. Fragment Identifiers</a>: 155 * </p> 156 * <p> 157 * <i>When defining fragment identifiers to be backward-compatible, only 158 * strings matching the pattern [A-Za-z][A-Za-z0-9:_.-]* should be used.</i> 159 * </p> 160 * <p> 161 * To achieve this we need to convert the <i>id</i> String. Two conversions 162 * are necessary and one is done to get prettier ids: 163 * </p> 164 * <ol> 165 * <li>Remove whitespace at the start and end before starting to process</li> 166 * <li>If the first character is not a letter, prepend the id with the letter 'a'</li> 167 * <li>Any spaces are replaced with an underscore '_'</li> 168 * <li> 169 * Any characters not matching the above pattern are either dropped, 170 * or replaced according to the rules specified in the 171 * <a href="http://www.w3.org/TR/html4/appendix/notes.html#non-ascii-chars">HTML specs</a>. 172 * </li> 173 * </ol> 174 * <p> 175 * For letters, the case is preserved in the conversion. 176 * </p> 177 * 178 * <p> 179 * Here are some examples: 180 * </p> 181 * <pre> 182 * DoxiaUtils.encodeId( null ) = null 183 * DoxiaUtils.encodeId( "" ) = "a" 184 * DoxiaUtils.encodeId( " " ) = "a" 185 * DoxiaUtils.encodeId( " _ " ) = "a_" 186 * DoxiaUtils.encodeId( "1" ) = "a1" 187 * DoxiaUtils.encodeId( "1anchor" ) = "a1anchor" 188 * DoxiaUtils.encodeId( "_anchor" ) = "a_anchor" 189 * DoxiaUtils.encodeId( "a b-c123 " ) = "a_b-c123" 190 * DoxiaUtils.encodeId( " anchor" ) = "anchor" 191 * DoxiaUtils.encodeId( "myAnchor" ) = "myAnchor" 192 * </pre> 193 * 194 * @param id The id to be encoded. 195 * May be null in which case null is returned. 196 * @param chop true if non-ASCII characters should be ignored. 197 * If false, any non-ASCII characters will be replaced as specified above. 198 * @return The trimmed and encoded id, or null if id is null. 199 * If id is not null, the return value is guaranteed to be a valid Doxia id. 200 * @see #isValidId(java.lang.String) 201 * @since 1.1.1 202 */ 203 public static String encodeId( final String id, final boolean chop ) 204 { 205 if ( id == null ) 206 { 207 return null; 208 } 209 210 final String idd = id.trim(); 211 int length = idd.length(); 212 213 if ( length == 0 ) 214 { 215 return "a"; 216 } 217 218 StringBuilder buffer = new StringBuilder( length ); 219 220 for ( int i = 0; i < length; ++i ) 221 { 222 char c = idd.charAt( i ); 223 224 if ( ( i == 0 ) && ( !isAsciiLetter( c ) ) ) 225 { 226 buffer.append( 'a' ); 227 } 228 229 if ( c == ' ' ) 230 { 231 buffer.append( '_' ); 232 } 233 else if ( isAsciiLetter( c ) || isAsciiDigit( c ) || ( c == '-' ) || ( c == '_' ) || ( c == ':' ) 234 || ( c == '.' ) ) 235 { 236 buffer.append( c ); 237 } 238 else if ( !chop ) 239 { 240 241 byte[] bytes = String.valueOf( c ).getBytes( StandardCharsets.UTF_8 ); 242 243 for ( byte aByte : bytes ) 244 { 245 buffer.append( '.' ); 246 buffer.append( String.format( "%02X", aByte ) ); 247 } 248 } 249 } 250 251 return buffer.toString(); 252 } 253 254 /** 255 * Convert a byte to it's hexadecimal equivalent. 256 * 257 * @param b the byte value. 258 * @return the result of Integer.toHexString( b & 0xFF ). 259 * @since 1.1.1 260 * @deprecated Use {@code String.format( "%02X", bytes[j] )} 261 */ 262 @Deprecated 263 public static String byteToHex( final byte b ) 264 { 265 return Integer.toHexString( b & MINUS_ONE ); 266 } 267 268 /** 269 * Determines if the specified text is a valid id according to the rules 270 * laid out in {@link #encodeId(String)}. 271 * 272 * @param text The text to be tested. 273 * May be null in which case false is returned. 274 * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>. 275 * @see #encodeId(String) 276 */ 277 public static boolean isValidId( final String text ) 278 { 279 if ( text == null || text.length() == 0 ) 280 { 281 return false; 282 } 283 284 for ( int i = 0; i < text.length(); ++i ) 285 { 286 char c = text.charAt( i ); 287 288 if ( isAsciiLetter( c ) ) 289 { 290 continue; 291 } 292 293 if ( ( i == 0 ) || ( c == ' ' ) || ( !isAsciiDigit( c ) && c != '-' && c != '_' && c != ':' && c != '.' ) ) 294 { 295 return false; 296 } 297 } 298 299 return true; 300 } 301 302 private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat( "", Locale.ENGLISH ); 303 private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition( 0 ); 304 private static final String[] DATE_PATTERNS = new String[] 305 { 306 "yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy", 307 "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy", 308 "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy" 309 }; 310 311 /** 312 * <p>Parses a string representing a date by trying different date patterns.</p> 313 * 314 * <p>The following date patterns are tried (in the given order):</p> 315 * 316 * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy", 317 * "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy", 318 * "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre> 319 * 320 * <p>A parse is only sucessful if it parses the whole of the input string. 321 * If no parse patterns match, a ParseException is thrown.</p> 322 * 323 * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code> 324 * (ignoring case) return the current date.</p> 325 * 326 * @param str the date to parse, not null. 327 * @return the parsed date, or the current date if the input String (ignoring case) was 328 * <code>"today"</code> or <code>"now"</code>. 329 * 330 * @throws ParseException if no pattern matches. 331 * @throws NullPointerException if str is null. 332 * @since 1.1.1. 333 */ 334 public static Date parseDate( final String str ) 335 throws ParseException 336 { 337 if ( "today".equalsIgnoreCase( str ) || "now".equalsIgnoreCase( str ) ) 338 { 339 return new Date(); 340 } 341 342 for ( String datePattern : DATE_PATTERNS ) 343 { 344 DATE_PARSER.applyPattern( datePattern ); 345 DATE_PARSE_POSITION.setIndex( 0 ); 346 final Date date = DATE_PARSER.parse( str, DATE_PARSE_POSITION ); 347 348 if ( date != null && DATE_PARSE_POSITION.getIndex() == str.length() ) 349 { 350 return date; 351 } 352 } 353 354 throw new ParseException( "Unable to parse date: " + str, -1 ); 355 } 356 357 // 358 // private 359 // 360 361 private static boolean isAsciiLetter( final char c ) 362 { 363 return ( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) ); 364 } 365 366 private static boolean isAsciiDigit( final char c ) 367 { 368 return ( c >= '0' && c <= '9' ); 369 } 370 371 /** 372 * Determine width and height of an image. If successful, the returned SinkEventAttributes 373 * contain width and height attribute keys whose values are the width and height of the image (as a String). 374 * 375 * @param logo a String containing either a URL or a path to an image file. Not null. 376 * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image. 377 * 378 * @throws java.io.IOException if an error occurs during reading. 379 * @throws NullPointerException if logo is null. 380 * 381 * @since 1.1.1 382 */ 383 public static MutableAttributeSet getImageAttributes( final String logo ) 384 throws IOException 385 { 386 BufferedImage img; 387 388 if ( isExternalLink( logo ) ) 389 { 390 img = ImageIO.read( new URL( logo ) ); 391 } 392 else 393 { 394 img = ImageIO.read( new File( logo ) ); 395 } 396 397 if ( img == null ) 398 { 399 return null; 400 } 401 402 MutableAttributeSet atts = new SinkEventAttributeSet(); 403 atts.addAttribute( SinkEventAttributeSet.WIDTH, Integer.toString( img.getWidth() ) ); 404 atts.addAttribute( SinkEventAttributeSet.HEIGHT, Integer.toString( img.getHeight() ) ); 405 // add other attributes? 406 407 return atts; 408 } 409 410 private DoxiaUtils() 411 { 412 // utility class 413 } 414 }