#!/usr/local/bin/python2.7 # -*- python -*- # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # # This is a Python CGI script that uses EZT to produce templated # mirror content and GeoIP to choose the appropriate mirror # import sys import os import cgi import stat import random import time import traceback import cStringIO # Should be installed in the main system library import GeoIP # Insert this directory into PATH so that we can import ezt this_dir = os.path.dirname(__file__) or '.' sys.path.insert(0, this_dir) import ezt # Configurable stuff MIRRORS_LIST = "/x1/www/www.apache.org/mirrors/mirrors.list" DEFAULT_REGION = "us" DEFAULT_TEMPLATE = "/x1/www/www.apache.org/dyn/closer.html" DEFAULT_LOCATION = "http://www.apache.org/dyn/closer.cgi" def get_region(environ): """Use GeoIP to find the client's country, falling back to DEFAULT_REGION on failure.""" try: remote_ip = environ['REMOTE_ADDR']; gi = GeoIP.new(GeoIP.GEOIP_STANDARD) region = gi.country_code_by_addr(remote_ip).lower().strip() if region == 'gb': return 'uk' else: return region except: ### should we log an error here? absorbing without reporting is ### generally bad form. return DEFAULT_REGION def parse_mirrors(filename, country, preferred, mingood): """Parse the mirror database to find the best mirrors for a client. The Format of the mirror database is (last two fields are optional): ftp au ftp://ftp.planetmirror.com/pub/apache/dist/ 1117724635 http://example.com/logo.gif http://example.com/ """ output = { 'http' : [ ], 'ftp' : [ ], 'backup' : [ ], 'preferred' : None, 'logo' : None, 'link' : None, } # Read the mirror database and put it in a list of lists # skip empty lines and comment mirrors = [line.split() for line in open(filename).readlines() if line.strip() and not line.startswith('#')] mirrors.append(['http', 'us', 'http://archive.apache.org/dist/', '9999999999']) # Add trailing slashes where missing. Otherwise, # strcat("http://www.mirror.org", pathinfo=".foo.evil") would link to # http://www.mirror.org.foo.evil for mir in mirrors: if not mir[2].endswith('/'): mir[2] += '/' # grab the backup mirrors backupmirrors = [mir for mir in mirrors if mir[1] == 'Backup'] # Grab the mirrors for the requested country or, failing that, # from the default region (us) for region in (country, DEFAULT_REGION): countrymirrors = [mir for mir in mirrors if mir[1] == region] random.shuffle(countrymirrors) goodmirror = None for mir in countrymirrors: if mir[0] == 'http' and int(mir[3]) > mingood: goodmirror = mir break if goodmirror: break # Check if the requested Preferred mirror is in the list # Note the user-requested mirror doesn't have a trailing-slash prefmir = None if preferred: for mir in mirrors: if mir[2] == preferred: prefmir = mir break # Otherwise pick a preferred mirror from our country if not prefmir and goodmirror: prefmir = goodmirror if not prefmir: # In the worst case, choose a backup prefmir = random.choice(backupmirrors) # Record the preferred mirror and, if available, its logo and link # Keep the trailing-slash on the URL (it is later joined to the path_info) output['preferred'] = prefmir[2] if len(prefmir) > 5: output['logo'] = prefmir[4] output['link'] = prefmir[5] # Now assemble a list of all the other mirrors. # Keep the trailing-slash on the URL (it is later joined to the path_info) output['http'] = [mir[2] for mir in countrymirrors if mir[0] == 'http'] output['ftp'] = [mir[2] for mir in countrymirrors if mir[0] == 'ftp'] output['backup'] = [mir[2] for mir in backupmirrors] return output def mirrorwrap(environ, start_response): try: return mirrorsapp(environ, start_response) except: status = "500 Oops" response_headers = [("content-type","text/plain")] start_response(status, response_headers, sys.exc_info()) return ["Problem running mirror.cgi, contact " "if it persists.\n\n" + traceback.format_exc() ] def locate_template(environ): # Determine the correct template by noting our filesystem location if environ.has_key('ASF_MIRROR_FILENAME'): template_file = environ['ASF_MIRROR_FILENAME'].replace(".cgi", ".html") elif environ.has_key('SCRIPT_FILENAME'): template_file = environ['SCRIPT_FILENAME'].replace(".cgi", ".html") else: template_file = sys.argv[0].replace(".cgi", ".html") if not os.path.isfile(template_file): # look in docroot instead if this is in a cgi-bin dir template_file = template_file.replace("/cgi-bin/", "/content/") if not os.path.isfile(template_file): template_file = DEFAULT_TEMPLATE return template_file def locate_mirrors(environ): # Allow the MIRRORS_LIST environment variable to override the default mirrors = environ.get('MIRRORS_LIST') if mirrors and os.path.isfile(mirrors): return mirrors return MIRRORS_LIST # the default def mirrorsapp(environ, start_response): headers = [ ] resp_code = '200 OK' # Where is the client coming from region = get_region(environ) # Was there a preferred mirror or update requirement? form = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ, keep_blank_values=True) preferred = form.getfirst("Preferred", "") update = form.getfirst("update", "") # Get the last update time of the mirror database mirrors = locate_mirrors(environ) base_time = os.path.getmtime(mirrors) # convert from YYYYMMDDhhmm to time-since-unix-epoch try: mingood = time.mktime(time.strptime(update, "%Y%m%d%H%M")) # Never use a mirror more than a week old mingood = max(mingood, base_time - 7*24*60*60) except: # if we didn't get a time, or we can't convert it, then # use the time the mirror database was last updated minus 24 hours mingood = base_time - 24*60*60 # Load the mirrors file and parse it out data = parse_mirrors(mirrors, region, preferred, mingood) # ======== new download tracking code ========== action = form.getfirst("action", "") filename = form.getfirst("filename", "") if action == 'download' and filename != '': url = "%s%s" % (data['preferred'][:-1], filename) headers.append(('Location', url)) start_response(resp_code, headers) #log_download(url, data['preferred'], environ, region) return '' # ====== end new download tracking code ======== # Note location to self data['location'] = environ.get('SCRIPT_NAME', DEFAULT_LOCATION) path_param = form.getfirst("path", None) if path_param: path_info = cgi.escape(path_param, 1) else: # Note any PATH_INFO if environ.has_key('PATH_INFO'): path_info = cgi.escape(environ['PATH_INFO'], 1) if environ.has_key('SCRIPT_NAME'): if environ['PATH_INFO'] == environ['SCRIPT_NAME']: path_info = '' else: path_info = '' # The mirror URL already has a trailing slash. Avoid doubling it up. if path_info.startswith('/'): path_info = path_info[1:] data['path_info'] = path_info template_file = locate_template(environ) # Print out the CGI header component # using xml if the filename ends with the magic '--xml' string if template_file.endswith('--xml.html'): headers.append(('Content-type', 'text/xml')) else: headers.append(('Content-type', 'text/html')) start_response(resp_code, headers) output = cStringIO.StringIO() template = ezt.Template(template_file) template.generate(output, data) return [ output.getvalue() ] # ======== new download tracking code ========== def log_download(url, preferred, environ, region): timestamp = int(time.time()) # format: timestamp,url,mirror,IP,region,"useragent" try: fh = open('/home/jfthomps/debug', 'a') fh.write('%s,%s,%s,%s,%s,"%s"\n' % (timestamp, url, preferred, environ['REMOTE_ADDR'], region, environ['HTTP_USER_AGENT'])) fh.close except: pass # ====== end new download tracking code ======== if __name__ == '__main__': #from flup.server.fcgi import WSGIServer from flup.server.cgi import WSGIServer WSGIServer(mirrorwrap).run()