Coverage Report - org.apache.commons.feedparser.locate.ResourceExpander
 
Classes in this File Line Coverage Branch Coverage Complexity
ResourceExpander
0%
0/80
0%
0/50
5.125
 
 1  
 /*
 2  
  * Copyright 1999,2004 The Apache Software Foundation.
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  * 
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.apache.commons.feedparser.locate;
 18  
 
 19  
 import java.util.regex.Pattern;
 20  
 
 21  
 import org.apache.log4j.Logger;
 22  
 
 23  
 /**
 24  
  *
 25  
  * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
 26  
  * @version $Id: ResourceExpander.java 373622 2006-01-30 22:53:00Z mvdb $
 27  
  */
 28  0
 public class ResourceExpander {
 29  
 
 30  0
     private static Logger log = Logger.getLogger( ResourceExpander.class );
 31  
 
 32  
     /** A regexp to determine if a URL has a scheme, such as "http://foo.com".
 33  
      */
 34  0
     protected static Pattern schemePattern = Pattern.compile("^\\w*://.*");
 35  
     
 36  
     /**
 37  
      * Expand a link relavant to the current site.  This takes care of links
 38  
      * such as
 39  
      *
 40  
      * /foo.html -> http://site.com/base/foo.html
 41  
      *
 42  
      * foo.html -> http://site.com/base/foo.html
 43  
      *
 44  
      * Links should *always* be expanded before they are used.
 45  
      *
 46  
      * This is because if we use the URL http://site.com/base then we don't know
 47  
      * if it's a directory or a file.  http://site.com/base/ would be a directory.
 48  
      * 
 49  
      * Note that all resource URLs will have correct trailing slashes.  If the URL
 50  
      * does not end with / then it is a file URL and not a directory.
 51  
      * 
 52  
      * @param resource The absolute base URL that will be used to expand the
 53  
      * link, such as "http://www.codinginparadise.org".
 54  
      * @param link The link to possibly expand, such as "/index.rdf" or
 55  
      * "http://www.somehost.com/somepage.html".
 56  
      *
 57  
      * 
 58  
      */
 59  
     public static String expand( String resource, String link ) {
 60  
 
 61  0
         if ( link == null )
 62  0
             return null;
 63  
 
 64  
         //make sure we can use this.
 65  0
         if ( !isValidScheme( link ) )
 66  0
             return link;
 67  
 
 68  
         //nothing if ALREADY relativized
 69  0
         if ( isExpanded( link ) )
 70  0
             return link;
 71  
 
 72  
         //    From: http://www.w3.org/Addressing/rfc1808.txt
 73  
         //
 74  
         //    If the parse string begins with a double-slash "//", then the
 75  
         //    substring of characters after the double-slash and up to, but not
 76  
         //    including, the next slash "/" character is the network
 77  
         //    location/login (<net_loc>) of the URL.  If no trailing slash "/"
 78  
         //    is present, the entire remaining parse string is assigned to
 79  
         //    <net_loc>.  The double- slash and <net_loc> are removed from the
 80  
         //    parse string before
 81  
         //FIXME: What happens if resource is a "file://" scheme?
 82  0
         if ( link.startsWith( "//" ) ) {
 83  
 
 84  0
             return "http:" + link;
 85  
 
 86  
         }
 87  
 
 88  
         //keep going
 89  0
         if ( link.startsWith( "/" ) ) {
 90  
 
 91  0
             link = getSite( resource ) + link;
 92  
 
 93  0
             return link;
 94  
 
 95  0
         } else if ( link.startsWith( "#" ) ) {
 96  
 
 97  0
             link = resource + link;
 98  
 
 99  0
             return link;
 100  
 
 101  0
         } else if ( link.startsWith( ".." ) ) {
 102  
 
 103  
             //ok.  We need to get rid of these .. directories.
 104  
 
 105  0
             String base = getBase( resource ) + "/";
 106  
 
 107  0
             while ( link.startsWith( ".." ) ) {
 108  
 
 109  
                 //get rid of the first previous dir in the link
 110  0
                 int begin = 2;
 111  0
                 if ( link.length() > 2 && link.charAt( 2 ) == '/' )
 112  0
                     begin = 3;
 113  
 
 114  0
                 link = link.substring( begin, link.length() );
 115  
 
 116  
                 //get rid of the last directory in the resource
 117  
 
 118  0
                 int end = base.length();
 119  
 
 120  0
                 if ( base.endsWith( "/" ) )
 121  0
                      --end;
 122  
 
 123  0
                 base = base.substring( 0, base.lastIndexOf( "/", end - 1 ) );
 124  
 
 125  0
             }
 126  
 
 127  0
             link = base + "/" + link;
 128  
 
 129  0
             return link;
 130  
 
 131  
         }
 132  
 
 133  
         // If the resource ends with a common file ending, then chop
 134  
         // off the file ending before adding the link
 135  
         // Is this rfc1808 compliant? Brad Neuberg, bkn3@columbia.edu
 136  0
         resource = getBase(resource);
 137  0
         if ( link.startsWith( "http://" ) == false ) {
 138  
 
 139  0
             link = resource + "/" + link;
 140  0
             log.debug("link="+link);
 141  
 
 142  
         }
 143  
 
 144  0
         return link;
 145  
 
 146  
     }
 147  
 
 148  
     /**
 149  
      * Return true if the given link is ALREADY relativized..
 150  
      *
 151  
      * 
 152  
      */
 153  
     public static boolean isExpanded( String resource ) {
 154  0
         return (resource.startsWith( "http://" ) ||
 155  
                 resource.startsWith( "file://" ));
 156  
     }
 157  
     
 158  
     /**
 159  
      * Return true if this is an valid scheme and should be expanded.
 160  
      *
 161  
      * 
 162  
      */
 163  
     public static boolean isValidScheme( String resource ) {
 164  0
         if (hasScheme(resource) == false)
 165  0
             return true;
 166  
         
 167  
         //only on file: and http:
 168  
 
 169  0
         if ( resource.startsWith( "http:" ) )
 170  0
             return true;
 171  
 
 172  0
         if ( resource.startsWith( "file:" ) )
 173  0
             return true;
 174  
 
 175  0
         return false;
 176  
         
 177  
     }
 178  
     
 179  
     /**
 180  
      * Determines if the given resource has a scheme. (i.e. does it start with
 181  
      * "http://foo.com" or does it just have "foo.com").
 182  
      */
 183  
     public static boolean hasScheme( String resource ) {
 184  0
         return schemePattern.matcher( resource ).matches();
 185  
         
 186  
     }
 187  
 
 188  
     /**
 189  
      * Get the site for this resource.  For example:
 190  
      *
 191  
      * http://www.foo.com/directory/index.html
 192  
      *
 193  
      * we will return
 194  
      *
 195  
      * http://www.foo.com
 196  
      *
 197  
      * for file: URLs we return file://
 198  
      *
 199  
      * 
 200  
      */
 201  
     public static String getSite( String resource ) {
 202  
 
 203  0
         if ( resource.startsWith( "file:" ) ) {
 204  0
             return "file://";
 205  
         } 
 206  
 
 207  
         //start at 8 which is the width of http://
 208  0
         int end = resource.indexOf( "/", 8 );
 209  
 
 210  0
         if ( end == -1 ) {
 211  
 
 212  0
             end = resource.length();
 213  
 
 214  
         } 
 215  
 
 216  0
         return resource.substring( 0, end );
 217  
 
 218  
     }
 219  
 
 220  
     /**
 221  
      * Given a URL get the domain name.  
 222  
      *
 223  
      * 
 224  
      */
 225  
     public static String getDomain( String resource ) {
 226  
 
 227  0
         String site = getSite( resource );
 228  
 
 229  0
         int firstIndex = -1;
 230  0
         int indexCount = 0;
 231  
 
 232  0
         int index = site.length();
 233  
 
 234  0
         while ( (index = site.lastIndexOf( ".", index-1 )) != -1 ) {
 235  
 
 236  0
             ++indexCount;
 237  
 
 238  0
             if ( indexCount == 2 )
 239  0
                 break;
 240  
 
 241  
         }
 242  
 
 243  0
         int begin = 7; // http:// length
 244  0
         if ( indexCount >= 2 )
 245  0
             begin = index + 1;
 246  
 
 247  0
         return site.substring( begin, site.length() );
 248  
         
 249  
     }
 250  
     
 251  
     /**
 252  
      * Get the base of this URL.  For example if we are given:
 253  
      *
 254  
      * http://www.foo.com/directory/index.html
 255  
      *
 256  
      * we will return
 257  
      *
 258  
      * http://www.foo.com/directory
 259  
      *
 260  
      *
 261  
      * 
 262  
      */
 263  
     public static String getBase( String resource ) {
 264  
 
 265  
         //FIXME: Brad says this method is totally broken.
 266  0
         if ( resource == null )
 267  0
             return null;
 268  
         
 269  0
         int begin = "http://".length() + 1;
 270  
         
 271  0
         int end = resource.lastIndexOf( "/" );
 272  
         
 273  0
         if ( end == -1 || end <= begin ) {
 274  
             //probaby a URL like http://www.cnn.com
 275  
             
 276  0
             end = resource.length();
 277  
             
 278  
         } 
 279  0
         return resource.substring( 0, end );
 280  
         
 281  
     } 
 282  
 
 283  
     public static void main( String[] args ) throws Exception {
 284  
 
 285  0
         System.out.println( expand( "http://peerfear.org/foo/bar/", "../../blog" ) );
 286  
 
 287  0
         System.out.println( expand( "http://peerfear.org/foo/bar/", "../../index.html" ) );
 288  
 
 289  0
         System.out.println( expand( "http://peerfear.org/blog/", ".." ) );
 290  
 
 291  0
         System.out.println( expand( "http://peerfear.org", "/blog" ) );
 292  0
         System.out.println( expand( "http://peerfear.org", "http://peerfear.org" ) );
 293  
 
 294  0
         System.out.println( expand( "http://peerfear.org", "blog" ) );
 295  0
         System.out.println( expand( "http://peerfear.org/blog", "foo/bar" ) );
 296  
 
 297  0
         System.out.println( expand( "file://projects/newsmonster/", "blog" ) );
 298  
 
 299  0
         System.out.println( expand( "file:/projects/ksa/src/java/ksa/test/TestFeedTask_WithRelativePath.rss"
 300  
                                       , "/blog" ) );        
 301  0
     }
 302  
 
 303  
 }
 304