Coverage Report - org.apache.commons.feedparser.locate.ResourceExpander
Classes in this File Line Coverage Branch Coverage Complexity
  * Copyright 1999,2004 The Apache Software Foundation.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
 package org.apache.commons.feedparser.locate;
 import java.util.regex.Pattern;
 import org.apache.log4j.Logger;
  * @author <a href="">Kevin A. Burton</a>
  * @version $Id: 373622 2006-01-30 22:53:00Z mvdb $
 28  0
 public class ResourceExpander {
 30  0
     private static Logger log = Logger.getLogger( ResourceExpander.class );
     /** A regexp to determine if a URL has a scheme, such as "".
 34  0
     protected static Pattern schemePattern = Pattern.compile("^\\w*://.*");
      * Expand a link relavant to the current site.  This takes care of links
      * such as
      * /foo.html ->
      * foo.html ->
      * Links should *always* be expanded before they are used.
      * This is because if we use the URL then we don't know
      * if it's a directory or a file. would be a directory.
      * Note that all resource URLs will have correct trailing slashes.  If the URL
      * does not end with / then it is a file URL and not a directory.
      * @param resource The absolute base URL that will be used to expand the
      * link, such as "".
      * @param link The link to possibly expand, such as "/index.rdf" or
      * "".
     public static String expand( String resource, String link ) {
 61  0
         if ( link == null )
 62  0
             return null;
         //make sure we can use this.
 65  0
         if ( !isValidScheme( link ) )
 66  0
             return link;
         //nothing if ALREADY relativized
 69  0
         if ( isExpanded( link ) )
 70  0
             return link;
         //    From:
         //    If the parse string begins with a double-slash "//", then the
         //    substring of characters after the double-slash and up to, but not
         //    including, the next slash "/" character is the network
         //    location/login (<net_loc>) of the URL.  If no trailing slash "/"
         //    is present, the entire remaining parse string is assigned to
         //    <net_loc>.  The double- slash and <net_loc> are removed from the
         //    parse string before
         //FIXME: What happens if resource is a "file://" scheme?
 82  0
         if ( link.startsWith( "//" ) ) {
 84  0
             return "http:" + link;
         //keep going
 89  0
         if ( link.startsWith( "/" ) ) {
 91  0
             link = getSite( resource ) + link;
 93  0
             return link;
 95  0
         } else if ( link.startsWith( "#" ) ) {
 97  0
             link = resource + link;
 99  0
             return link;
 101  0
         } else if ( link.startsWith( ".." ) ) {
             //ok.  We need to get rid of these .. directories.
 105  0
             String base = getBase( resource ) + "/";
 107  0
             while ( link.startsWith( ".." ) ) {
                 //get rid of the first previous dir in the link
 110  0
                 int begin = 2;
 111  0
                 if ( link.length() > 2 && link.charAt( 2 ) == '/' )
 112  0
                     begin = 3;
 114  0
                 link = link.substring( begin, link.length() );
                 //get rid of the last directory in the resource
 118  0
                 int end = base.length();
 120  0
                 if ( base.endsWith( "/" ) )
 121  0
 123  0
                 base = base.substring( 0, base.lastIndexOf( "/", end - 1 ) );
 125  0
 127  0
             link = base + "/" + link;
 129  0
             return link;
         // If the resource ends with a common file ending, then chop
         // off the file ending before adding the link
         // Is this rfc1808 compliant? Brad Neuberg,
 136  0
         resource = getBase(resource);
 137  0
         if ( link.startsWith( "http://" ) == false ) {
 139  0
             link = resource + "/" + link;
 140  0
 144  0
         return link;
      * Return true if the given link is ALREADY relativized..
     public static boolean isExpanded( String resource ) {
 154  0
         return (resource.startsWith( "http://" ) ||
                 resource.startsWith( "file://" ));
      * Return true if this is an valid scheme and should be expanded.
     public static boolean isValidScheme( String resource ) {
 164  0
         if (hasScheme(resource) == false)
 165  0
             return true;
         //only on file: and http:
 169  0
         if ( resource.startsWith( "http:" ) )
 170  0
             return true;
 172  0
         if ( resource.startsWith( "file:" ) )
 173  0
             return true;
 175  0
         return false;
      * Determines if the given resource has a scheme. (i.e. does it start with
      * "" or does it just have "").
     public static boolean hasScheme( String resource ) {
 184  0
         return schemePattern.matcher( resource ).matches();
      * Get the site for this resource.  For example:
      * we will return
      * for file: URLs we return file://
     public static String getSite( String resource ) {
 203  0
         if ( resource.startsWith( "file:" ) ) {
 204  0
             return "file://";
         //start at 8 which is the width of http://
 208  0
         int end = resource.indexOf( "/", 8 );
 210  0
         if ( end == -1 ) {
 212  0
             end = resource.length();
 216  0
         return resource.substring( 0, end );
      * Given a URL get the domain name.  
     public static String getDomain( String resource ) {
 227  0
         String site = getSite( resource );
 229  0
         int firstIndex = -1;
 230  0
         int indexCount = 0;
 232  0
         int index = site.length();
 234  0
         while ( (index = site.lastIndexOf( ".", index-1 )) != -1 ) {
 236  0
 238  0
             if ( indexCount == 2 )
 239  0
 243  0
         int begin = 7; // http:// length
 244  0
         if ( indexCount >= 2 )
 245  0
             begin = index + 1;
 247  0
         return site.substring( begin, site.length() );
      * Get the base of this URL.  For example if we are given:
      * we will return
     public static String getBase( String resource ) {
         //FIXME: Brad says this method is totally broken.
 266  0
         if ( resource == null )
 267  0
             return null;
 269  0
         int begin = "http://".length() + 1;
 271  0
         int end = resource.lastIndexOf( "/" );
 273  0
         if ( end == -1 || end <= begin ) {
             //probaby a URL like
 276  0
             end = resource.length();
 279  0
         return resource.substring( 0, end );
     public static void main( String[] args ) throws Exception {
 285  0
         System.out.println( expand( "", "../../blog" ) );
 287  0
         System.out.println( expand( "", "../../index.html" ) );
 289  0
         System.out.println( expand( "", ".." ) );
 291  0
         System.out.println( expand( "", "/blog" ) );
 292  0
         System.out.println( expand( "", "" ) );
 294  0
         System.out.println( expand( "", "blog" ) );
 295  0
         System.out.println( expand( "", "foo/bar" ) );
 297  0
         System.out.println( expand( "file://projects/newsmonster/", "blog" ) );
 299  0
         System.out.println( expand( "file:/projects/ksa/src/java/ksa/test/TestFeedTask_WithRelativePath.rss"
                                       , "/blog" ) );        
 301  0