--[[ This is a LUA CGI script that uses LibEZT to produce templated mirror content It uses the output from the Apache GeoIP module to choose the appropriate mirror(s) The script supports the following optional URL parameters: cca2 : override the country code preferred/Preferred : sets the preferred server if available, otherwise it is chosen at random as_json/asjson : don't process the template, but return the mirror data as JSON action=download together with filename : generate a redirect to the file on the preferred mirror archive_aware : sets ARCHIVE_AWARE = true ; default : false This script is dist, attic and archive aware. If the target's first path-component (TLP) has a corresponding file /x1/www/attic.apache.org/projects/TLP.html, then the script redirects to that page. If the final target is not in dist, it uses template archive.html ; if ARCHIVE_AWARE, it looks up the target on archive.apache.org with a HEAD request. The lookups are cached ; forever for positive results. ]] -- version number of this file (automatically generated by SVN) local VERSION = ("$Revision: 1820301 $"):match("(%d+)") function version() return VERSION end local ARCHIVE_AWARE = false -- do archive.a.o lookup with HEAD requests local CACHE_TIMEOUT = 1800 -- should be 0 in test ; 1800 in production local LOG_LOOKUPS = 0 -- should be 1 in test ; 0 in production local JSON = require 'JSON' local ezt = require 'libezt' -- Temporary hack: allow for missing module local HTTP = {request=function() end} local reqHTTP, _ = pcall(function() HTTP = require 'socket.http'; HTTP.TIMEOUT = 1; end) local SOCK = {gettime=function() return 0 end} local reqSOCK, _ = pcall(function() SOCK = require 'socket' ; end) local posix = require 'posix' local mirror_file = "/x1/www/www.apache.org/mirrors/mirrors.list" local MAXAGE = 24*3600 -- max mirror age local ATTIC_URI = 'http://attic.apache.org/projects/' local ATTIC_DIR = '/var/www/attic.apache.org/projects/' local DIST_DIR = '/var/www/www.apache.org/content/dist/' local ARCH_URI = 'http://archive.apache.org/dist/' local DYN_DIR = '/var/www/www.apache.org/dyn/' local CLOSER_PG = DYN_DIR .. 'closer.html' local ARCHIVE_PG = DYN_DIR .. 'archive.html' local STATS_DIR = DYN_DIR .. 'stats/' local LOOKUP_LOG = STATS_DIR .. 'AAAA' local cache_hit = nil local cache_in_arch = {} -- cache of archive lookups local mirror_stamp = 0 -- when mirror_file was last processed local mirror_map = {} -- map of all recent mirrors. [ftp|http|rsync][cc|backup]=url local mirror_templates = {} -- cache of unprocessed mirror templates local mirror_templates_generated = {} -- cache of generated templates local mymap -- map of mirrors for the current request (based on the country code function get_mirrors() local now = os.time() local atleast = now - MAXAGE local f = io.open(mirror_file, "r") local mirrord = f:read("*a") -- Check the age of the mirrors relative to the mirror list, rather than now. (As was done by mirrors.cgi) -- This allows the system to still work even if the list is a bit stale -- LUA does not have a standard API to get a file date -- However, the timestamp when the information was collected is more useful anyway -- Parse the file header: # date : Wed Sep 2 09:49:53 2015 [UTC] local mon, day, hh, mm, ss, yy = mirrord:match("# date : %w+ (%w+) +(%d+) (%d%d):(%d%d):(%d%d) (%d%d%d%d) %[UTC%]") if mon then local MON = {Jan=1,Feb=2,Mar=3,Apr=4,May=5,Jun=6,Jul=7,Aug=8,Sep=9,Oct=10,Nov=11,Dec=12} -- use isdst = false as the timestamp is UTC local filetime = os.time({year = yy, month = MON[mon], day = day, hour = hh, min = mm, sec = ss, isdst=false}) atleast = filetime - MAXAGE end mirror_map = {} f:close() for t, c, url, timestamp in mirrord:gmatch("([a-zA-Z]+)%s+([a-zA-Z]+)%s+(%S+)%s+(%d+)\r?\n") do if c then c = c:lower() -- Don't check the timestamp for backup mirrors if c == 'backup' or tonumber(timestamp) >= atleast then mirror_map[c] = mirror_map[c] or {} mirror_map[c][t] = mirror_map[c][t] or {} --url = url:gsub("/$", "") table.insert(mirror_map[c][t], url) end end end mirror_stamp = now return mirror_map end function log_lookup(inarch,path) local f = io.open(LOOKUP_LOG,'a') if f then f:write(os.date('%Y-%m-%d/%H:%M:%S') .. " [" .. ( posix.getpid().pid or 'pid' ) .. ']' .. " look=" .. tostring(inarch) .. " hit=" .. tostring(cache_hit) .. ' ' .. path .. "\n" ) f:close() end end function interval(t) return 1000 * ( SOCK.gettime() - t ) end function elapsed(t) return string.format("%.3f ms'\n",interval(t)) end function file_exists(file) return posix.stat(file) ~= nil end function is_in_attic(proj) return file_exists(ATTIC_DIR .. proj .. '.html') end function dist_path(path) return DIST_DIR .. path end function is_in_dist(path) return file_exists(dist_path(path)) end function arch_uri(path) return ARCH_URI .. path end function archive_url(path) local uri = arch_uri(path) return '' .. uri .. '' end function is_in_arch(path) cache_hit = nil if not cache_in_arch[path] then cache_hit = false elseif cache_in_arch[path].result then cache_hit = true else local stamp = cache_in_arch[path].timestamp cache_hit = ( os.time() - stamp ) < CACHE_TIMEOUT end if not cache_hit then local r, c r, c, _ = HTTP.request { method = "HEAD", url = arch_uri(path) } cache_in_arch[path] = { timestamp = os.time(), result = ( r and c == 200 ) } end return cache_in_arch[path].result end function get_page(url) if not mirror_templates[url] or mirror_templates[url].timestamp < (os.time() - 2*CACHE_TIMEOUT) then local f = io.open(url, "r") mirror_templates[url] = { data = f and f:read("*a") or "No such page", timestamp = os.time() } if f then f:close() end end return mirror_templates[url] end function get_output_cached(page, defs, r, ezt_defs) local pref = defs.preferred or "" local path_info = defs.path_info or "" local cacheKey = page .. ":" .. pref .. ":" .. path_info if not mirror_templates_generated[cacheKey] or mirror_templates_generated[cacheKey].timestamp < (os.time() - CACHE_TIMEOUT) then local template = get_page(page) local tdata = recurse(defs, template.data, r, ezt_defs) mirror_templates_generated[cacheKey] = { data = tdata, timestamp = os.time() } end return mirror_templates_generated[cacheKey] end function recurse(defs, tdata, r, ezt_defs) -- SSI emulation tdata = tdata:gsub("", function(inc) local filepath = (defs.filepath .. inc):gsub("[/]+", "/") if r:stat(filepath) then local f = io.open(filepath, "r") local d = f:read("*a") f:close() return d else return "" end end ) -- Parse EZT local structure, error = ezt:import("[ezt]"..tdata.."[end]") -- Render output if structure then return ezt:construct(structure, ezt_defs) else return error end end -- true if the string (s) ends with (e) function endsWith(s, e) return e == s:sub(-e:len()) end -- true if the string (s) begins with (b) function beginsWith(s, b) return b == s:sub(1, b:len()) end -- return false if string is empty (or nil) function nonEmpty(s) if s == null or s == '' then return nil end return s end -- Temporary fix to extract the missing path_info for dyn/closer.cgi redirects only function get_path_info(s) local CGI_SCRIPT = "/dyn/closer.cgi/" -- original CGI script name if beginsWith(s, CGI_SCRIPT) then return s:sub(CGI_SCRIPT:len()) -- keep just the suffix else return nil end end -- The request parameter has the data structures and functions as described here: -- http://httpd.apache.org/docs/trunk/mod/mod_lua.html#datastructures -- http://httpd.apache.org/docs/trunk/mod/mod_lua.html#functions function handle(r) local get = r:parseargs() if get.archive_aware and not ( get.archive_aware == '0' ) then ARCHIVE_AWARE = true end local now = os.time() if mirror_stamp < (now - 3600) then get_mirrors() end local country = r.notes['GEOIP_COUNTRY_NAME'] or r.subprocess_env['GEOIP_COUNTRY_NAME'] or "Unknown" local cca2 = (get.cca2 or r.notes['GEOIP_COUNTRY_CODE'] or r.subprocess_env['GEOIP_COUNTRY_CODE'] or 'Backup'):lower() if cca2 == 'gb' then cca2 = 'uk' end local occa2 = cca2 if not mirror_map[cca2] then cca2 = 'backup' end mymap = mirror_map[cca2] or mirror_map['backup'] local bmap = mirror_map['backup'] mymap['backup'] = bmap['http'] local URL = {} for _, t in pairs({'http','ftp'}) do URL[t] = (mymap[t] and mymap[t][math.random(1, #mymap[t])]) or (bmap[t] and bmap[t][math.random(1, #bmap[t])]) end local page = r.filename local got_f = get.f -- work on a copy of the parameter if got_f then local hname = r.hostname:gsub("www%.", "") got_f = got_f:gsub("^/var/www/html/", "/var/www/") got_f = got_f:gsub(hname, ""):gsub("/var/www//var/www/", "/var/www/") if r:stat(got_f) or r:stat(got_f:gsub("%.cgi", ".html")) then page = got_f else page = got_f:gsub("/www/", "/www/" .. hname:gsub("%.[a-z][a-z]%.", ".") .. "/"):gsub("[/]+", "/") end end page = page:gsub("%.cgi", ".html"):gsub("%.lua", ".html") if not r:stat(page) or not (page:match("^/var/www/") or page:match("^/x1/www/")) then page = CLOSER_PG end local defs = {} local ezt_defs = { strings = {}, arrays = {} } defs.filepath = page:gsub("[^/]+$", "") defs.debug = get.debug and true or false defs.preferred = r:escape_html(get.preferred or get.Preferred or URL['http'] or "") defs.path_info = r:escape_html(get.path -- command-line override or nonEmpty(r.path_info) -- if path provided by server or get_path_info(r.uri) -- temporary fix to extract it from r.uri for dyn/closer.cgi calls -- Disable for now; it was previously effectively disabled because r.path_info was never false -- or r.unparsed_uri:gsub("^.+%.cgi/*", ""):gsub("^.+%.lua/*", "") -- not sure what this is trying to do -- TODO in any case seems wrong to use the unparsed URI as that will include the query string or "/") -- default :gsub("^/","",1) -- trim leading "/" as per Python version defs.country = country defs.cca2 = cca2 -- proj is the first path component of defs.path_info local proj = defs.path_info if proj and proj:find('/') then proj = proj:sub(1,proj:find('/')-1) end defs.project = proj ezt_defs.strings = defs ezt_defs.arrays = { http = mymap['http'] or bmap['http'], ftp = mymap['ftp'] or bmap['ftp'], backup = bmap['http'], } -- Check that preferred http/ftp exists, otherwise default to none local prefIsOkay = false for _,b in ipairs({'http', 'ftp', 'backup'}) do for _, v in pairs(ezt_defs.arrays[b] or {}) do -- arrays[b] may not exist if r:escape_html(v) == defs.preferred then prefIsOkay = true break end end if prefIsOkay then break end end if not prefIsOkay then ezt_defs.preferred = "" defs.preferred = URL['http'] end -- string only repr of preferred URL if get.preferred and get.preferred == "true" then r.content_type = "text/plain" r:puts(defs.preferred) return apache2.OK end local do_json = false if (get.as_json and not (get.as_json == "0")) or (get.asjson and not (get.asjson == "0")) then do_json = true end if get.action then if get.action == 'download' and get.filename then r.headers_out['Location'] = defs.preferred .. get.filename r.status = 302 return apache2.OK elseif get.action == 'info' then r.content_type = "text/plain" r:puts(string.format("%s\ncloser revision: %s\nlibezt revision: %s\n", _VERSION, -- LUA version(), -- closer ezt:version())) -- libezt -- Show any arguments for k, v in pairs( get ) do r:puts( string.format("arg %s: %s\n", k, v) ) end local t0 = SOCK.gettime() ; local URI = r.subprocess_env['SCRIPT_URI'] or "nil" -- Request parameters r:puts("r.hostname: '",r.hostname or "nil", "'\n") r:puts("r.document_root:'",r.document_root or "nil", "'\n") r:puts("r.uri: '",r.uri or "nil", "'\n") -- r:puts("r.the_request: '",r.the_request or "nil", "'\n") -- r:puts("r.unparsed_uri: '",r.unparsed_uri or "nil", "'\n") r:puts("r.path_info: '",r.path_info or "nil","'\n") r:puts("env[SCRIPT_URI] '",URI,"'\n") r:puts("require HTTP : '",tostring(reqHTTP),"'\n") r:puts("require SOCK : '",tostring(reqSOCK),"'\n") -- calculated values r:puts("defs.path_info: '",defs.path_info or "nil","'\n") r:puts("defs.filepath: '",defs.filepath or "nil","'\n") r:puts("proj : '",proj,"'\n") r:puts("proj in attic: '",tostring(is_in_attic(proj)),"'\n") local in_dist = tostring(is_in_dist(defs.path_info)) r:puts("dist/path : '",dist_path(defs.path_info),"'\n") r:puts("path in dist?: '",in_dist,"'\n") r:puts("elapsed : '",elapsed(t0)) r:puts("archive aware: '",tostring(ARCHIVE_AWARE),"'\n") if in_dist == 'false' then r:puts("archive uri : '",arch_uri(defs.path_info),"'\n") end if ARCHIVE_AWARE then r:puts("... archive lookup ...\n") r:puts("process PID : '",tostring(posix.getpid().pid),"'\n") local in_arch = tostring(is_in_arch(defs.path_info)) r:puts("archive uri : '",arch_uri(defs.path_info),"'\n") r:puts("path in arch?: '",in_arch,"'\n") r:puts("arch cachehit: '",tostring(cache_hit),"'\n") r:puts("elapsed : '",elapsed(t0)) end return apache2.OK elseif get.action == 'catlog' then r.content_type = "text/plain" local f = io.open(LOOKUP_LOG) if f then while true do local line = f:read() if line == nil then break end r:puts(line,"\n") end f:close() else r:puts("can't open " .. LOOKUP_LOG .. "\n") end return apache2.OK else r.content_type = "text/plain" r:puts("unknow action [" .. get.action .. "]\n") return apache2.OK end end if do_json then r.content_type = "application/json" r:puts(JSON:encode_pretty({ path_info = defs.path_info, preferred = defs.preferred, http = mymap['http'] or bmap['http'], ftp = mymap['ftp'] or bmap['ftp'], backup = bmap['http'], in_dist = is_in_dist(defs.path_info), in_attic = is_in_attic(proj), cca2 = occa2 })) return apache2.OK end if is_in_attic(proj) then r.headers_out['Location'] = ATTIC_URI .. proj .. ".html" r.status = 302 return apache2.OK end if not is_in_dist(defs.path_info) then local arch_home = archive_url('') ; local arch_path = archive_url(defs.path_info) local lookup = '' ; if ARCHIVE_AWARE then local inarch = is_in_arch(defs.path_info) if inarch == nil then if reqHTTP then lookup = 'A lookup on ' .. arch_home .. ' failed.' else lookup = "Can't do lookups on " .. arch_home end lookup = lookup .. "
Try " .. arch_path elseif inarch then lookup = 'The object is in our archive : ' .. arch_path else lookup = 'The object is in not in our archive ' .. arch_home end if LOG_LOOKUPS then log_lookup(inarch,defs.path_info) end else -- not ARCHIVE_AWARE lookup = "It may be in our archive : " .. arch_path end defs.lookup = lookup page = ARCHIVE_PG end local rootpath = defs.path_info:match("^([-a-z0-9]+)/") if rootpath and rootpath == "incubator" then rootpath = defs.path_info:match("^incubator/([-a-z0-9]+)/") end if rootpath then local f = io.open(STATS_DIR .. rootpath .. ".log", "a") if f then -- get a bit of the IP to identify multiple unique request with same TS/CCA2 local ipbit = r.useragent_ip:match("([a-f0-9]+):?:?$") or r.useragent_ip:match("^([a-f0-9]+)") or "000" f:write(os.time() .. " " .. ipbit .. " " .. occa2 .. " " .. defs.path_info .. "\n") f:close() end end local tdata = get_output_cached(page, defs, r, ezt_defs) -- check for special content-type based on file name if endsWith(page,"--xml.html") then r.content_type = "text/xml" else r.content_type = "text/html" end r:puts(tdata.data) if r.hostname == 'www.apache.org' then r:puts("") end return apache2.OK end