001package org.apache.maven.doxia.util;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.awt.image.BufferedImage;
023
024import java.io.File;
025import java.io.IOException;
026import java.io.UnsupportedEncodingException;
027
028import java.net.URL;
029
030import java.text.ParseException;
031import java.text.ParsePosition;
032import java.text.SimpleDateFormat;
033
034import java.util.Date;
035import java.util.Locale;
036
037import javax.imageio.ImageIO;
038
039import javax.swing.text.MutableAttributeSet;
040
041import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
042
043/**
044 * General Doxia utility methods. The methods in this class should not assume
045 * any specific Doxia module or document format.
046 *
047 * @author ltheussl
048 * @since 1.1
049 * @version $Id$
050 */
051public class DoxiaUtils
052{
053    private static final int MINUS_ONE = 0xFF;
054
055    /**
056     * Checks if the given string corresponds to an internal link,
057     * ie it is a link to an anchor within the same document.
058     * If link is not null, then exactly one of the three methods
059     * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
060     * {@link #isLocalLink(java.lang.String)} will return true.
061     *
062     * @param link The link to check. Not null.
063     * @return True if the link starts with "#".
064     *
065     * @throws NullPointerException if link is null.
066     *
067     * @see #isExternalLink(String)
068     * @see #isLocalLink(String)
069     */
070    public static boolean isInternalLink( final String link )
071    {
072        return link.startsWith( "#" );
073    }
074
075    /**
076     * Checks if the given string corresponds to an external URI,
077     * ie is not a link within the same document nor a relative link
078     * to another document (a local link) of the same site.
079     * If link is not null, then exactly one of the three methods
080     * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
081     * {@link #isLocalLink(java.lang.String)} will return true.
082     *
083     * @param link The link to check. Not null.
084     *
085     * @return True if the link (ignoring case) starts with either "http:/",
086     * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://".
087     * Note that Windows style separators "\" are not allowed
088     * for URIs, see  http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3.
089     *
090     * @throws NullPointerException if link is null.
091     *
092     * @see #isInternalLink(String)
093     * @see #isLocalLink(String)
094     */
095    public static boolean isExternalLink( final String link )
096    {
097        String text = link.toLowerCase( Locale.ENGLISH );
098
099        return ( text.startsWith( "http:/" ) || text.startsWith( "https:/" )
100            || text.startsWith( "ftp:/" ) || text.startsWith( "mailto:" )
101            || text.startsWith( "file:/" ) || text.contains( "://" ) );
102    }
103
104    /**
105     * Checks if the given string corresponds to a relative link to another document
106     * within the same site, ie it is neither an {@link #isInternalLink(String) internal}
107     * nor an {@link #isExternalLink(String) external} link.
108     * If link is not null, then exactly one of the three methods
109     * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
110     * {@link #isLocalLink(java.lang.String)} will return true.
111     *
112     * @param link The link to check. Not null.
113     *
114     * @return True if the link is neither an external nor an internal link.
115     *
116     * @throws NullPointerException if link is null.
117     *
118     * @see #isExternalLink(String)
119     * @see #isInternalLink(String)
120     */
121    public static boolean isLocalLink( final String link )
122    {
123        return ( !isExternalLink( link ) && !isInternalLink( link ) );
124    }
125
126    /**
127     * Construct a valid Doxia id.
128     *
129     * <p>
130     *   This method is equivalent to {@link #encodeId(java.lang.String, boolean) encodeId( id, false )}.
131     * </p>
132     *
133     * @param id The id to be encoded.
134     *      May be null in which case null is returned.
135     *
136     * @return The trimmed and encoded id, or null if id is null.
137     *
138     * @see #encodeId(java.lang.String, boolean)
139     */
140    public static String encodeId( final String id )
141    {
142        return encodeId( id, false );
143    }
144
145    /**
146     * Construct a valid Doxia id.
147     *
148     * <p>
149     *   A valid Doxia id obeys the same constraints as an HTML ID or NAME token.
150     *   According to the <a href="http://www.w3.org/TR/html4/types.html#type-name">
151     *   HTML 4.01 specification section 6.2 SGML basic types</a>:
152     * </p>
153     * <p>
154     *   <i>ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
155     *   followed by any number of letters, digits ([0-9]), hyphens ("-"),
156     *   underscores ("_"), colons (":"), and periods (".").</i>
157     * </p>
158     * <p>
159     *   According to <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML 1.0
160     *   section C.8. Fragment Identifiers</a>:
161     * </p>
162     * <p>
163     *   <i>When defining fragment identifiers to be backward-compatible, only
164     *   strings matching the pattern [A-Za-z][A-Za-z0-9:_.-]* should be used.</i>
165     * </p>
166     * <p>
167     *   To achieve this we need to convert the <i>id</i> String. Two conversions
168     *   are necessary and one is done to get prettier ids:
169     * </p>
170     * <ol>
171     *   <li>Remove whitespace at the start and end before starting to process</li>
172     *   <li>If the first character is not a letter, prepend the id with the letter 'a'</li>
173     *   <li>Any spaces are replaced with an underscore '_'</li>
174     *   <li>
175     *     Any characters not matching the above pattern are either dropped,
176     *     or replaced according to the rules specified in the
177     *     <a href="http://www.w3.org/TR/html4/appendix/notes.html#non-ascii-chars">HTML specs</a>.
178     *   </li>
179     * </ol>
180     * <p>
181     *   For letters, the case is preserved in the conversion.
182     * </p>
183     *
184     * <p>
185     * Here are some examples:
186     * </p>
187     * <pre>
188     * DoxiaUtils.encodeId( null )        = null
189     * DoxiaUtils.encodeId( "" )          = "a"
190     * DoxiaUtils.encodeId( "  " )        = "a"
191     * DoxiaUtils.encodeId( " _ " )       = "a_"
192     * DoxiaUtils.encodeId( "1" )         = "a1"
193     * DoxiaUtils.encodeId( "1anchor" )   = "a1anchor"
194     * DoxiaUtils.encodeId( "_anchor" )   = "a_anchor"
195     * DoxiaUtils.encodeId( "a b-c123 " ) = "a_b-c123"
196     * DoxiaUtils.encodeId( "   anchor" ) = "anchor"
197     * DoxiaUtils.encodeId( "myAnchor" )  = "myAnchor"
198     * </pre>
199     *
200     * @param id The id to be encoded.
201     *      May be null in which case null is returned.
202     * @param chop true if non-ASCII characters should be ignored.
203     * If false, any non-ASCII characters will be replaced as specified above.
204     *
205     * @return The trimmed and encoded id, or null if id is null.
206     * If id is not null, the return value is guaranteed to be a valid Doxia id.
207     *
208     * @see #isValidId(java.lang.String)
209     *
210     * @since 1.1.1
211     */
212    public static String encodeId( final String id, final boolean chop )
213    {
214        if ( id == null )
215        {
216            return null;
217        }
218
219        final String idd = id.trim();
220        int length = idd.length();
221
222        if ( length == 0 )
223        {
224            return "a";
225        }
226
227        StringBuilder buffer = new StringBuilder( length );
228
229        for ( int i = 0; i < length; ++i )
230        {
231            char c = idd.charAt( i );
232
233            if ( ( i == 0 ) && ( !isAsciiLetter( c ) ) )
234            {
235                buffer.append( 'a' );
236            }
237
238            if ( c == ' ' )
239            {
240                buffer.append( '_' );
241            }
242            else if ( isAsciiLetter( c ) || isAsciiDigit( c ) || ( c == '-' ) || ( c == '_' ) || ( c == ':' )
243                            || ( c == '.' ) )
244            {
245                buffer.append( c );
246            }
247            else if ( !chop )
248            {
249                byte[] bytes;
250
251                try
252                {
253                    bytes = String.valueOf( c ).getBytes( "UTF8" );
254                }
255                catch ( UnsupportedEncodingException cannotHappen )
256                {
257                    bytes = new byte[0];
258                }
259
260                for ( int j = 0; j < bytes.length; ++j )
261                {
262                    String hex = byteToHex( bytes[j] );
263
264                    buffer.append( '%' );
265
266                    if ( hex.length() == 1 )
267                    {
268                        buffer.append( '0' );
269                    }
270
271                    buffer.append( hex );
272                }
273            }
274        }
275
276        return buffer.toString();
277    }
278
279    /**
280     * Convert a byte to it's hexadecimal equivalent.
281     *
282     * @param b the byte value.
283     * @return the result of Integer.toHexString( b & 0xFF ).
284     *
285     * @since 1.1.1
286     */
287    public static String byteToHex( final byte b )
288    {
289        return Integer.toHexString( b & MINUS_ONE );
290    }
291
292    /**
293     * Determines if the specified text is a valid id according to the rules
294     * laid out in {@link #encodeId(String)}.
295     *
296     * @param text The text to be tested.
297     *      May be null in which case false is returned.
298     *
299     * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>.
300     *
301     * @see #encodeId(String)
302     */
303    public static boolean isValidId( final String text )
304    {
305        if ( text == null || text.length() == 0 )
306        {
307            return false;
308        }
309
310        for ( int i = 0; i < text.length(); ++i )
311        {
312            char c = text.charAt( i );
313
314            if ( isAsciiLetter( c ) )
315            {
316                continue;
317            }
318
319            if ( ( i == 0 ) || ( c == ' ' ) || ( !isAsciiDigit( c ) && c != '-' && c != '_' && c != ':' && c != '.' ) )
320            {
321                return false;
322            }
323        }
324
325        return true;
326    }
327
328    private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat( "", Locale.ENGLISH );
329    private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition( 0 );
330    private static final String[] DATE_PATTERNS = new String[]
331    {
332        "yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
333        "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
334        "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"
335    };
336
337    /**
338     * <p>Parses a string representing a date by trying different date patterns.</p>
339     *
340     * <p>The following date patterns are tried (in the given order):</p>
341     *
342     * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
343     *  "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
344     *  "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre>
345     *
346     * <p>A parse is only sucessful if it parses the whole of the input string.
347     * If no parse patterns match, a ParseException is thrown.</p>
348     *
349     * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code>
350     * (ignoring case) return the current date.</p>
351     *
352     * @param str the date to parse, not null.
353     *
354     * @return the parsed date, or the current date if the input String (ignoring case) was
355     *      <code>"today"</code> or <code>"now"</code>.
356     *
357     * @throws ParseException if no pattern matches.
358     * @throws NullPointerException if str is null.
359     *
360     * @since 1.1.1.
361     */
362    public static Date parseDate( final String str )
363            throws ParseException
364    {
365        if ( "today".equalsIgnoreCase( str ) || "now".equalsIgnoreCase( str ) )
366        {
367            return new Date();
368        }
369
370        for ( int i = 0; i < DATE_PATTERNS.length; i++ )
371        {
372            DATE_PARSER.applyPattern( DATE_PATTERNS[i] );
373            DATE_PARSE_POSITION.setIndex( 0 );
374            final Date date = DATE_PARSER.parse( str, DATE_PARSE_POSITION );
375
376            if ( date != null && DATE_PARSE_POSITION.getIndex() == str.length() )
377            {
378                return date;
379            }
380        }
381
382        throw new ParseException( "Unable to parse date: " + str, -1 );
383    }
384
385      //
386     // private
387    //
388
389    private static boolean isAsciiLetter( final char c )
390    {
391        return ( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) );
392    }
393
394    private static boolean isAsciiDigit( final char c )
395    {
396        return ( c >= '0' && c <= '9' );
397    }
398
399    /**
400     * Determine width and height of an image. If successful, the returned SinkEventAttributes
401     * contain width and height attribute keys whose values are the width and height of the image (as a String).
402     *
403     * @param logo a String containing either a URL or a path to an image file. Not null.
404     *
405     * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image.
406     *
407     * @throws java.io.IOException if an error occurs during reading.
408     * @throws NullPointerException if logo is null.
409     *
410     * @since 1.1.1
411     */
412    public static MutableAttributeSet getImageAttributes( final String logo )
413            throws IOException
414    {
415        BufferedImage img = null;
416
417        if ( isExternalLink( logo ) )
418        {
419            img = ImageIO.read( new URL( logo ) );
420        }
421        else
422        {
423            img = ImageIO.read( new File( logo ) );
424        }
425
426        if ( img == null )
427        {
428            return null;
429        }
430
431        MutableAttributeSet atts = new SinkEventAttributeSet();
432        atts.addAttribute( SinkEventAttributeSet.WIDTH, Integer.toString( img.getWidth() ) );
433        atts.addAttribute( SinkEventAttributeSet.HEIGHT, Integer.toString( img.getHeight() ) );
434        // add other attributes?
435
436        return atts;
437    }
438
439    private DoxiaUtils()
440    {
441        // utility class
442    }
443}