Coverage Report - org.apache.commons.pipeline.stage.HttpFileDownloadStage
 
Classes in this File Line Coverage Branch Coverage Complexity
HttpFileDownloadStage
49%
24/49
38%
6/16
0
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *     http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.commons.pipeline.stage;
 19  
 
 20  
 import java.io.BufferedInputStream;
 21  
 import java.io.BufferedOutputStream;
 22  
 import java.io.File;
 23  
 import java.io.FileOutputStream;
 24  
 import java.io.IOException;
 25  
 import java.io.InputStream;
 26  
 import java.io.OutputStream;
 27  
 import java.net.HttpURLConnection;
 28  
 import java.net.MalformedURLException;
 29  
 import java.net.URL;
 30  
 
 31  
 import org.apache.commons.logging.Log;
 32  
 import org.apache.commons.logging.LogFactory;
 33  
 import org.apache.commons.pipeline.StageException;
 34  
 import org.apache.commons.pipeline.validation.ConsumedTypes;
 35  
 import org.apache.commons.pipeline.validation.ProducedTypes;
 36  
 
 37  
 
 38  
 /**
 39  
  * This {@link org.apache.commons.pipeline.Pipeline$Stage Stage} provides the 
 40  
  * functionality needed to retrieve data from an HTTP URL. Multipart responses 
 41  
  * are not yet supported.
 42  
  */
 43  
 @ConsumedTypes({URL.class, String.class})
 44  
 @ProducedTypes({File.class})
 45  
 public class HttpFileDownloadStage extends BaseStage {
 46  
     private static final int BUFFER_SIZE = 10000;
 47  2
     private String workDir = null;
 48  2
     private Log log = LogFactory.getLog(HttpFileDownloadStage.class);
 49  
     
 50  4
     public HttpFileDownloadStage() { }
 51  
     
 52  
     /**
 53  
      * Creates a new HttpFileDownloadStage which will download files to the
 54  
      * specified work directory.
 55  
      * @param workDir the path to which files will be downloaded.
 56  
      */
 57  0
     public HttpFileDownloadStage(String workDir) {
 58  0
         this.workDir = workDir;
 59  0
     }
 60  
     
 61  
     /**
 62  
      * Removes a java.net.URL (an HTTP URL) or string representing a URL from 
 63  
      * the input queue, and then retrieves the data at that URL and stores it
 64  
      * in a temporary file. The file is stored in the directory specified by 
 65  
      * {@link #setWorkDir(String) setWorkDir()}, or to the system default 
 66  
      * temporary directory if no work directory is set.
 67  
      *
 68  
      * @param obj The URL from which to download data.
 69  
      * @throws IllegalArgumentException if the parameter obj is not a string or 
 70  
      * an instance of {@link java.net.URL}.
 71  
      * @throws StageException if there is an error retrieving data from the 
 72  
      * URL specified.
 73  
      */
 74  
     public void process(Object obj) throws StageException {
 75  
         //Map params = new HashMap();
 76  
         
 77  
         URL url;
 78  
         try {
 79  2
             if (obj instanceof String) {
 80  
 //                String loc = (String) obj;
 81  
 //                int paramIndex = loc.indexOf('?');
 82  
 //                if (paramIndex > 0) {
 83  
 //                    url = new URL(loc.substring(0, paramIndex));
 84  
 //                    for (StringTokenizer st = new StringTokenizer(loc.substring(paramIndex + 1), "&"); st.hasMoreTokens();) {
 85  
 //                        String tok = st.nextToken();
 86  
 //                        int eqIndex = tok.indexOf('=');
 87  
 //                        if (eqIndex > 0) {
 88  
 //                            params.put(tok.substring(0, eqIndex), tok.substring(eqIndex + 1));
 89  
 //                        }
 90  
 //                        else {
 91  
 //                            params.put(tok, null);
 92  
 //                        }
 93  
 //                    }
 94  
 //                }
 95  
 //                else {
 96  1
                 url = new URL((String) obj);
 97  
 //                }
 98  1
             } else if (obj instanceof URL) {
 99  1
                 url = (URL) obj;
 100  
             } else {
 101  0
                 throw new IllegalArgumentException("Unrecognized parameter class to process() for HttpFileDownload: " + obj.getClass().getName() + "; must be URL or String");
 102  
             }
 103  0
         } catch (MalformedURLException e) {
 104  0
             throw new StageException(this, "Malformed URL: " + obj, e);
 105  2
         }
 106  
         
 107  2
         log.debug("Retrieving data from " + url.toString());
 108  
         
 109  
 //        try {
 110  
 //            url = handleRedirects(url);
 111  
 //        }
 112  
 //        catch (Exception e) { //catches MalformedURLException, IOException
 113  
 //            throw new StageException("An error was encountered attempting to follow URL redirects from " + url.toString(), e);
 114  
 //        }
 115  
         
 116  2
         HttpURLConnection con = null;
 117  
         try {
 118  2
             con = (java.net.HttpURLConnection) url.openConnection();
 119  
 //            if (!params.isEmpty()) {
 120  
 //                con.setRequestMethod("GET");
 121  
 //                for (Iterator iter = params.entrySet().iterator(); iter.hasNext();) {
 122  
 //                    Map.Entry entry = (Map.Entry) iter.next();
 123  
 //                    con.setRequestProperty((String) entry.getKey(), (String) entry.getValue());
 124  
 //                }
 125  
 //            }
 126  
 
 127  2
             File workDir = (this.workDir == null) ? null : new File(this.workDir);
 128  2
             File workFile = File.createTempFile("http-file-download","tmp", workDir);
 129  
             
 130  2
             InputStream in = new BufferedInputStream(con.getInputStream());
 131  2
             OutputStream out = new BufferedOutputStream(new FileOutputStream(workFile, false));
 132  2
             byte[] buffer = new byte[BUFFER_SIZE]; //attempt to read 10k at a time
 133  2
             for (int results = 0; (results = in.read(buffer)) != -1;) {
 134  2
                 out.write(buffer, 0, results);
 135  
             }
 136  2
             out.close();
 137  2
             in.close();
 138  
             
 139  2
             this.emit(workFile);
 140  0
         } catch (IOException e) {
 141  0
             throw new StageException(this, "An error occurred downloading a data file from " + url.toString(), e);
 142  
         } finally {
 143  2
             con.disconnect();
 144  2
         }        
 145  2
     }
 146  
     
 147  
     
 148  
     /**
 149  
      * Sets the working directory for the file download. If the directory does
 150  
      * not already exist, it will be created during the preprocess() step.
 151  
      * If you do not set this directory, the work directory will be the
 152  
      * default temporary directory for your machine type.
 153  
      */
 154  
     public void setWorkDir(String workDir) {
 155  0
         this.workDir = workDir;
 156  0
     }
 157  
     
 158  
     /**
 159  
      * Returns the name of the file download directory.
 160  
      */
 161  
     public String getWorkDir() {
 162  0
         return this.workDir;
 163  
     }
 164  
     
 165  
     /**
 166  
      * Follows redirects from the specified URL and recursively returns the destination
 167  
      * URL. This method does not check for circular redirects, so it is possible that a malicious
 168  
      * site could force this method into infinite recursion.
 169  
      *
 170  
      * TODO: Add a max_hops parameterized version
 171  
      */
 172  
     public URL handleRedirects(URL url) throws IOException, MalformedURLException {
 173  0
         java.net.HttpURLConnection.setFollowRedirects(false);
 174  0
         HttpURLConnection con = (HttpURLConnection) url.openConnection();
 175  0
         int response = con.getResponseCode();
 176  0
         log.debug("Response code for " + url + " = " + response);
 177  
         
 178  0
         if (response == java.net.HttpURLConnection.HTTP_MOVED_PERM || response == java.net.HttpURLConnection.HTTP_MOVED_TEMP) {
 179  0
             String location = con.getHeaderField("Location");
 180  0
             log.debug("Handling redirect to location: " + location);
 181  
             
 182  0
             if (location.startsWith("http:")) {
 183  0
                 url = new URL(location);
 184  0
             } else if (location.startsWith("/")) {
 185  0
                 url = new URL("http://" + url.getHost() + location);
 186  
             } else {
 187  0
                 url = new URL(con.getURL(), location);
 188  
             }
 189  
             
 190  0
             url = handleRedirects(url); // to handle nested redirections
 191  
         }
 192  
         
 193  0
         return url;
 194  
     }
 195  
 }