View Javadoc
1   package org.apache.maven.doxia.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.awt.image.BufferedImage;
23  
24  import java.io.File;
25  import java.io.IOException;
26  
27  import java.net.URL;
28  
29  import java.nio.charset.StandardCharsets;
30  import java.text.ParseException;
31  import java.text.ParsePosition;
32  import java.text.SimpleDateFormat;
33  
34  import java.util.Date;
35  import java.util.Locale;
36  
37  import javax.imageio.ImageIO;
38  
39  import javax.swing.text.MutableAttributeSet;
40  
41  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
42  
43  /**
44   * General Doxia utility methods. The methods in this class should not assume
45   * any specific Doxia module or document format.
46   *
47   * @author ltheussl
48   * @since 1.1
49   */
50  public class DoxiaUtils
51  {
52      private static final int MINUS_ONE = 0xFF;
53  
54      /**
55       * Checks if the given string corresponds to an internal link,
56       * ie it is a link to an anchor within the same document.
57       * If link is not null, then exactly one of the three methods
58       * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
59       * {@link #isLocalLink(java.lang.String)} will return true.
60       *
61       * @param link The link to check. Not null.
62       * @return True if the link starts with "#".
63       *
64       * @throws NullPointerException if link is null.
65       * @see #isExternalLink(String)
66       * @see #isLocalLink(String)
67       */
68      public static boolean isInternalLink( final String link )
69      {
70          return link.startsWith( "#" );
71      }
72  
73      /**
74       * Checks if the given string corresponds to an external URI,
75       * ie is not a link within the same document nor a relative link
76       * to another document (a local link) of the same site.
77       * If link is not null, then exactly one of the three methods
78       * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
79       * {@link #isLocalLink(java.lang.String)} will return true.
80       *
81       * @param link The link to check. Not null.
82       * @return True if the link (ignoring case) starts with either "http:/",
83       * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://".
84       * Note that Windows style separators "\" are not allowed
85       * for URIs, see  http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3.
86       *
87       * @throws NullPointerException if link is null.
88       *
89       * @see #isInternalLink(String)
90       * @see #isLocalLink(String)
91       */
92      public static boolean isExternalLink( final String link )
93      {
94          String text = link.toLowerCase( Locale.ENGLISH );
95  
96          return ( text.startsWith( "http:/" ) || text.startsWith( "https:/" )
97              || text.startsWith( "ftp:/" ) || text.startsWith( "mailto:" )
98              || text.startsWith( "file:/" ) || text.contains( "://" ) );
99      }
100 
101     /**
102      * Checks if the given string corresponds to a relative link to another document
103      * within the same site, ie it is neither an {@link #isInternalLink(String) internal}
104      * nor an {@link #isExternalLink(String) external} link.
105      * If link is not null, then exactly one of the three methods
106      * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
107      * {@link #isLocalLink(java.lang.String)} will return true.
108      *
109      * @param link The link to check. Not null.
110      * @return True if the link is neither an external nor an internal link.
111      *
112      * @throws NullPointerException if link is null.
113      *
114      * @see #isExternalLink(String)
115      * @see #isInternalLink(String)
116      */
117     public static boolean isLocalLink( final String link )
118     {
119         return ( !isExternalLink( link ) && !isInternalLink( link ) );
120     }
121 
122     /**
123      * Construct a valid Doxia id.
124      *
125      * <p>
126      *   This method is equivalent to {@link #encodeId(java.lang.String, boolean) encodeId( id, false )}.
127      * </p>
128      *
129      * @param id The id to be encoded.
130      *      May be null in which case null is returned.
131      * @return The trimmed and encoded id, or null if id is null.
132      * @see #encodeId(java.lang.String, boolean)
133      */
134     public static String encodeId( final String id )
135     {
136         return encodeId( id, false );
137     }
138 
139     /**
140      * Construct a valid Doxia id.
141      *
142      * <p>
143      *   A valid Doxia id obeys the same constraints as an HTML ID or NAME token.
144      *   According to the <a href="http://www.w3.org/TR/html4/types.html#type-name">
145      *   HTML 4.01 specification section 6.2 SGML basic types</a>:
146      * </p>
147      * <p>
148      *   <i>ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
149      *   followed by any number of letters, digits ([0-9]), hyphens ("-"),
150      *   underscores ("_"), colons (":"), and periods (".").</i>
151      * </p>
152      * <p>
153      *   According to <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML 1.0
154      *   section C.8. Fragment Identifiers</a>:
155      * </p>
156      * <p>
157      *   <i>When defining fragment identifiers to be backward-compatible, only
158      *   strings matching the pattern [A-Za-z][A-Za-z0-9:_.-]* should be used.</i>
159      * </p>
160      * <p>
161      *   To achieve this we need to convert the <i>id</i> String. Two conversions
162      *   are necessary and one is done to get prettier ids:
163      * </p>
164      * <ol>
165      *   <li>Remove whitespace at the start and end before starting to process</li>
166      *   <li>If the first character is not a letter, prepend the id with the letter 'a'</li>
167      *   <li>Any spaces are replaced with an underscore '_'</li>
168      *   <li>
169      *     Any characters not matching the above pattern are either dropped,
170      *     or replaced according to the rules specified in the
171      *     <a href="http://www.w3.org/TR/html4/appendix/notes.html#non-ascii-chars">HTML specs</a>.
172      *   </li>
173      * </ol>
174      * <p>
175      *   For letters, the case is preserved in the conversion.
176      * </p>
177      *
178      * <p>
179      * Here are some examples:
180      * </p>
181      * <pre>
182      * DoxiaUtils.encodeId( null )        = null
183      * DoxiaUtils.encodeId( "" )          = "a"
184      * DoxiaUtils.encodeId( "  " )        = "a"
185      * DoxiaUtils.encodeId( " _ " )       = "a_"
186      * DoxiaUtils.encodeId( "1" )         = "a1"
187      * DoxiaUtils.encodeId( "1anchor" )   = "a1anchor"
188      * DoxiaUtils.encodeId( "_anchor" )   = "a_anchor"
189      * DoxiaUtils.encodeId( "a b-c123 " ) = "a_b-c123"
190      * DoxiaUtils.encodeId( "   anchor" ) = "anchor"
191      * DoxiaUtils.encodeId( "myAnchor" )  = "myAnchor"
192      * </pre>
193      *
194      * @param id The id to be encoded.
195      *      May be null in which case null is returned.
196      * @param chop true if non-ASCII characters should be ignored.
197      * If false, any non-ASCII characters will be replaced as specified above.
198      * @return The trimmed and encoded id, or null if id is null.
199      * If id is not null, the return value is guaranteed to be a valid Doxia id.
200      * @see #isValidId(java.lang.String)
201      * @since 1.1.1
202      */
203     public static String encodeId( final String id, final boolean chop )
204     {
205         if ( id == null )
206         {
207             return null;
208         }
209 
210         final String idd = id.trim();
211         int length = idd.length();
212 
213         if ( length == 0 )
214         {
215             return "a";
216         }
217 
218         StringBuilder buffer = new StringBuilder( length );
219 
220         for ( int i = 0; i < length; ++i )
221         {
222             char c = idd.charAt( i );
223 
224             if ( ( i == 0 ) && ( !isAsciiLetter( c ) ) )
225             {
226                 buffer.append( 'a' );
227             }
228 
229             if ( c == ' ' )
230             {
231                 buffer.append( '_' );
232             }
233             else if ( isAsciiLetter( c ) || isAsciiDigit( c ) || ( c == '-' ) || ( c == '_' ) || ( c == ':' )
234                             || ( c == '.' ) )
235             {
236                 buffer.append( c );
237             }
238             else if ( !chop )
239             {
240 
241                 byte[] bytes = String.valueOf( c ).getBytes( StandardCharsets.UTF_8 );
242 
243                 for ( byte aByte : bytes )
244                 {
245                     buffer.append( '.' );
246                     buffer.append( String.format( "%02X", aByte ) );
247                 }
248             }
249         }
250 
251         return buffer.toString();
252     }
253 
254     /**
255      * Convert a byte to it's hexadecimal equivalent.
256      *
257      * @param b the byte value.
258      * @return the result of Integer.toHexString( b &amp; 0xFF ).
259      * @since 1.1.1
260      * @deprecated Use {@code String.format( "%02X", bytes[j] )}
261      */
262     @Deprecated
263     public static String byteToHex( final byte b )
264     {
265         return Integer.toHexString( b & MINUS_ONE );
266     }
267 
268     /**
269      * Determines if the specified text is a valid id according to the rules
270      * laid out in {@link #encodeId(String)}.
271      *
272      * @param text The text to be tested.
273      *      May be null in which case false is returned.
274      * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>.
275      * @see #encodeId(String)
276      */
277     public static boolean isValidId( final String text )
278     {
279         if ( text == null || text.length() == 0 )
280         {
281             return false;
282         }
283 
284         for ( int i = 0; i < text.length(); ++i )
285         {
286             char c = text.charAt( i );
287 
288             if ( isAsciiLetter( c ) )
289             {
290                 continue;
291             }
292 
293             if ( ( i == 0 ) || ( c == ' ' ) || ( !isAsciiDigit( c ) && c != '-' && c != '_' && c != ':' && c != '.' ) )
294             {
295                 return false;
296             }
297         }
298 
299         return true;
300     }
301 
302     private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat( "", Locale.ENGLISH );
303     private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition( 0 );
304     private static final String[] DATE_PATTERNS = new String[]
305     {
306         "yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
307         "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
308         "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"
309     };
310 
311     /**
312      * <p>Parses a string representing a date by trying different date patterns.</p>
313      *
314      * <p>The following date patterns are tried (in the given order):</p>
315      *
316      * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
317      *  "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
318      *  "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre>
319      *
320      * <p>A parse is only sucessful if it parses the whole of the input string.
321      * If no parse patterns match, a ParseException is thrown.</p>
322      *
323      * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code>
324      * (ignoring case) return the current date.</p>
325      *
326      * @param str the date to parse, not null.
327      * @return the parsed date, or the current date if the input String (ignoring case) was
328      *      <code>"today"</code> or <code>"now"</code>.
329      *
330      * @throws ParseException if no pattern matches.
331      * @throws NullPointerException if str is null.
332      * @since 1.1.1.
333      */
334     public static Date parseDate( final String str )
335             throws ParseException
336     {
337         if ( "today".equalsIgnoreCase( str ) || "now".equalsIgnoreCase( str ) )
338         {
339             return new Date();
340         }
341 
342         for ( String datePattern : DATE_PATTERNS )
343         {
344             DATE_PARSER.applyPattern( datePattern );
345             DATE_PARSE_POSITION.setIndex( 0 );
346             final Date date = DATE_PARSER.parse( str, DATE_PARSE_POSITION );
347 
348             if ( date != null && DATE_PARSE_POSITION.getIndex() == str.length() )
349             {
350                 return date;
351             }
352         }
353 
354         throw new ParseException( "Unable to parse date: " + str, -1 );
355     }
356 
357       //
358      // private
359     //
360 
361     private static boolean isAsciiLetter( final char c )
362     {
363         return ( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) );
364     }
365 
366     private static boolean isAsciiDigit( final char c )
367     {
368         return ( c >= '0' && c <= '9' );
369     }
370 
371     /**
372      * Determine width and height of an image. If successful, the returned SinkEventAttributes
373      * contain width and height attribute keys whose values are the width and height of the image (as a String).
374      *
375      * @param logo a String containing either a URL or a path to an image file. Not null.
376      * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image.
377      *
378      * @throws java.io.IOException if an error occurs during reading.
379      * @throws NullPointerException if logo is null.
380      *
381      * @since 1.1.1
382      */
383     public static MutableAttributeSet getImageAttributes( final String logo )
384             throws IOException
385     {
386         BufferedImage img;
387 
388         if ( isExternalLink( logo ) )
389         {
390             img = ImageIO.read( new URL( logo ) );
391         }
392         else
393         {
394             img = ImageIO.read( new File( logo ) );
395         }
396 
397         if ( img == null )
398         {
399             return null;
400         }
401 
402         MutableAttributeSet atts = new SinkEventAttributeSet();
403         atts.addAttribute( SinkEventAttributeSet.WIDTH, Integer.toString( img.getWidth() ) );
404         atts.addAttribute( SinkEventAttributeSet.HEIGHT, Integer.toString( img.getHeight() ) );
405         // add other attributes?
406 
407         return atts;
408     }
409 
410     private DoxiaUtils()
411     {
412         // utility class
413     }
414 }