 import java.util.Iterator;
 import org.apache.log4j.Logger;
  * ResourceRequest implementation that uses as the backend.
  * Differences from other ResourceRequests.
  * setRequestMethod() - Allows us to change the request type (HEAD, etc).
  * getContentLength() - Returns the length/size of the content represented by
  * this resource.  Can be used by clients with setRequestMethod( "HEAD" ) to
  * find the size of a remote resource without doing a full fetch.
  * @author <a href="">Kevin A. Burton</a>
  * @version $Id: 561366 2007-07-31 15:58:29Z rahul $
 46  0
 public class URLResourceRequest extends BaseResourceRequest implements ResourceRequest {
 48  0
     private static Logger log = Logger.getLogger( URLResourceRequest.class.getName() );
     public static final String ACCEPT_ENCODING_HEADER = "Accept-Encoding";
     public static final String IF_NONE_MATCH_HEADER = "If-None-Match";
     public static final String GZIP_ENCODING = "gzip";
     public static final String USER_AGENT_HEADER = "User-Agent";
      * Enable RFC 3228 HTTP Delta for feeds.
 64  0
     public static boolean ENABLE_HTTP_DELTA_FEED_IM = false;
 66  0
     public static String USER_AGENT
         = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:FeedParser; Gecko/20021130";
 69  0
     public static String USER_AGENT_MOZILLA
         = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1) Gecko/20021130";
      * Not used anymore.  Provided for historical reasons.
     public static final String REFERER
         = "";
     public static final int MAX_CONTENT_LENGTH = 1000000;
 80  0
     private URL _url = null;
 82  0
     private URLConnection _urlConnection = null;
 84  0
     private InputStream inputStream = null;
 86  0
     private boolean initConnection = false;
     public void init() throws IOException {
 94  0
         String resource = this.getResource();
         //if we are offline... we don't need to init.
 97  0
         if ( ResourceRequestFactory.isOffline() ) { return; } 
         //pull from the HTCache if it is enabled and then short-circuit so that
         //we don't fetch from the network.
         //NOTE: currently removed because the htcache wasn't portable. I can OSS
         //this in the future if necessary
         // if ( ResourceRequestFactory.isTransparentHTCacheEnabled() &&
         //     HTCache.hasContentInCache( this.getResource() ) ) {
         //    //get the input stream we can use from the HTCache.
         //    this.inputStream = HTCache.getContentAsInputStream( resource );
         //  return;
         // }
 114  0
         _url = new URL( this.getResource() );
 115  0
         _urlConnection = _url.openConnection();
 117  0
      * Init the actual connection.  Should be called AFTER init() but before
      * getInputStream() so that we can set any runtime params requestMethod,
      * etc.  If getInputStream() is called without an initConnection() we do
      * this automatically.  initConnection() might not want to be called when
      * doing a HEAD request.
     public void initConnection() throws NetworkException {
 130  0
         long before = System.currentTimeMillis();
 132  0
         initConnection = true;
 134  0
         //FIXME: do smart user agent detection.  if this is a .html file we can
         //set it to us Mozilla and if not we can use NewsMonster
         //_urlConnection.setRequestProperty( "Referer", REFERER );
 140  0
         String resource = this.getResource();
         //set the user agent if it hasn't ALREADY been set by the caller.
 143  0
         if ( getRequestHeaderField( USER_AGENT_HEADER ) == null ) {
 144  0
             _urlConnection.setRequestProperty( USER_AGENT_HEADER, USER_AGENT );
 147  0
         _urlConnection.setRequestProperty( ACCEPT_ENCODING_HEADER, GZIP_ENCODING );
         //copy over any headers set in the request..
 151  0
         Iterator it = getRequestHeaderFields();
 153  0
         while ( it.hasNext() ) {
 155  0
             String key = (String);
 157  0
             _urlConnection.setRequestProperty( key, getRequestHeaderField( key ) );
 159  0
 161  0
         if ( _urlConnection instanceof HttpURLConnection ) {
 163  0
             HttpURLConnection httpURLConn = (HttpURLConnection)_urlConnection;
 165  0
             httpURLConn.setFollowRedirects( getFollowRedirects() );
 166  0
             httpURLConn.setInstanceFollowRedirects( getFollowRedirects() );
 168  0
             if ( this.getIfModifiedSince() != -1 )
 169  0
                 httpURLConn.setIfModifiedSince( this.getIfModifiedSince() );
 171  0
             if ( getEtag() != null ) {
 172  0
                 httpURLConn.setRequestProperty( IF_NONE_MATCH_HEADER, getEtag() );
                 //now support RFC3229 HTTP Delta
                 //A-IM: feed, gzip
 177  0
                 if ( ENABLE_HTTP_DELTA_FEED_IM ) {
                     //note that this will return HTTP 226 if used.
 182  0
                     httpURLConn.setRequestProperty( "A-IM", "feed, gzip" );
             try {
 190  0
                 //setResource( getRedirectedResource() );
 194  0
                 this.setResponseCode( httpURLConn.getResponseCode() ); 
 196  0
             } catch ( IOException e ) {
 197  0
                 throw new NetworkException( e );
 198  0
 202  0
         int contentLength = _urlConnection.getContentLength();
         //bigger than 1 meg and it is a remote document (it is safe to process
         //local documents)
 206  0
         if ( contentLength > MAX_CONTENT_LENGTH &&
              this.getResource().startsWith( "file:" ) == false ) {
             //NOTE: make 100% sure this doens't just go ahead and download the
             //file FIRST before doing a HEAD.  I think that's what happens but I
             //might be wrong.
 213  0
             throw new NetworkException( "Content is too large - " + contentLength + " - " + getResource() );
 217  0
         long after = System.currentTimeMillis();
 219  0
         log.debug( getResource() + " - init duration: " + (after-before) );
 221  0
 223  0
     java.lang.reflect.Field FIELD_HTTP_URL_CONNECTION_HTTP = null;
 224  0
     java.lang.reflect.Field FIELD_HTTP_CLIENT_URL = null;
      * This method used Reflection to pull out the redirected URL in
      *  Internally
      * stores a reference to which then in turn does
      * all the redirection and stores the redirect  We just use
      * reflection to FETCH this URL and then call toString to get the correct
      * value.
      * Java needs the concept of readonly private variables.
     public String getResourceFromRedirect() {
         try {
 242  0
             if ( FIELD_HTTP_URL_CONNECTION_HTTP == null ) {
                 //Note: when using a FILE URL this won't work!                
 245  0
                 FIELD_HTTP_URL_CONNECTION_HTTP = _urlConnection.getClass().getDeclaredField( "http" );
 246  0
                 FIELD_HTTP_URL_CONNECTION_HTTP.setAccessible( true );
 250  0
             Object http = FIELD_HTTP_URL_CONNECTION_HTTP.get( _urlConnection );
             //when has already cleaned itself up 'http' will be
             //null here.
 254  0
             if ( http == null )
 255  0
                 return getResource();
 257  0
             if ( FIELD_HTTP_CLIENT_URL == null ) {
 259  0
                 FIELD_HTTP_CLIENT_URL = http.getClass().getDeclaredField( "url" );
 260  0
                 FIELD_HTTP_CLIENT_URL.setAccessible( true );
 264  0
             Object url = FIELD_HTTP_CLIENT_URL.get( http );
             //this will be a and now I can call the toString method
             //on it which will return our full URI.
 268  0
             return url.toString();
 270  0
         } catch ( Throwable t ) {
             //log.error( t );
 272  0
             return getResource();
     public InputStream getInputStream() throws NetworkException {
         try {
 280  0
             return _getInputStream();
 282  0
         } catch ( IOException e ) {
 284  0
             String message = null;
             //the modern VM buries the FileNotFoundException which prevents a
             //catch.  Very very ugly.
 288  0
             if ( e.getCause() instanceof FileNotFoundException ) {
 289  0
                 message = "File not found: " + e.getCause().getMessage();
             } else {
 291  0
                 message = e.getMessage();
 294  0
             throw new NetworkException( message, e, this, _url, _urlConnection );
     public InputStream _getInputStream() throws IOException {
 306  0
         if ( ! initConnection ) { initConnection(); } 
 308  0
         String resource = this.getResource();
         //if we haven't pulled from the cache (as above) and we are offline we
         //need to throw an exception.
 312  0
         if ( ResourceRequestFactory.isOffline() ) {
             //see if we can return from the HTCache.
             // if ( ResourceRequestFactory.isTransparentHTCacheEnabled() &&
             //     HTCache.hasContentInCache( resource ) )
             //    return HTCache.getContentAsInputStream( resource );
             //if not we should throw an exception
 320  0
             throw new IOException( "ResourceRequestFactory is offline and content was not in cache - " +
                                    resource );
         //if we are using an input stream NOT from init() 
 326  0
         if ( this.inputStream == null ) {
 328  0
             this.inputStream = _urlConnection.getInputStream();
 329  0
             this.inputStream = new AdvancedInputStream( this.inputStream, this );
             //first decompress
 332  0
             if ( GZIP_ENCODING.equals( _urlConnection.getContentEncoding() ) ) {
                 //note.  the advanced input stream must be wrapped by a GZIP
                 //input stream and not vice-versa or we will end up with
                 //incorrect results.
 338  0
                 this.inputStream = new GZIPInputStream( this.inputStream );
             // if ( ResourceRequestFactory.isTransparentHTCacheEnabled() ) {
             //     System.out.println( "cache store for: " +
             //                         resource + " as " +
             //                         HTCache.getContentAsPath( resource ) );
             //     //FIXME: performance improvement... don't write do disk and then
             //     //read from disk.?
             //     //store this content from the network and save it in the cache.  Then fetch it and return
             // resource, this.inputStream );
             //     return HTCache.getContentAsInputStream( resource );
             // }
 360  0
         setResource( getResourceFromRedirect() );
         //this is potentially teh cached input stream created if we have used
         //the HTCache.
 364  0
         return inputStream;
      * Set the RequestMethod of this URLConnection.
     public void setRequestMethod( String method ) throws NetworkException {
         try { 
 377  0
             if ( _urlConnection instanceof HttpURLConnection ) {
 379  0
                 ((HttpURLConnection)_urlConnection).setRequestMethod( method );
 383  0
         } catch ( ProtocolException pe ) {
 385  0
             NetworkException ne = new NetworkException( pe.getMessage() );
 386  0
             ne.initCause( pe );
 387  0
             throw ne;
 389  0
 391  0
     public int getContentLength() throws IOException {
 400  0
         if ( ! initConnection ) { initConnection(); } 
         //if ( _urlConnection instanceof HttpURLConnection ) {
 404  0
         return  _urlConnection.getContentLength();
     public String getHeaderField( String name ) {
 409  0
         return  _urlConnection.getHeaderField( name );