"); /** * The ProxyHandler class does the actual proxy'ing work. it deals both with * GET and POST based input, and peforms a request based on the input, headers and * httpmethod params. It also deals with request signing and verification thru the * authz and st (security token) params. * */ class ProxyHandler { private $context; private $signingFetcher; private $oauthFetcher; public function __construct($context, $signingFetcher = null, $oauthFetcher = null) { $this->context = $context; $this->signingFetcher = $signingFetcher; $this->oauthFetcher = $oauthFetcher; } /** * Fetches content and returns it in JSON format * * @param string $url the url to fetch * @param GadgetSigner $signer the request signer to use * @param string $method the http method to use (get or post) in making the request */ public function fetchJson($url, $signer, $method) { try { $token = $this->context->extractAndValidateToken($signer); } catch (Exception $e) { $token = ''; // no token given, safe to ignore } $url = $this->validateUrl($url); // Fetch the content and convert it into JSON. // TODO: Fetcher needs to handle variety of HTTP methods. $result = $this->fetchContentDivert($url, $method, $signer); if (! isset($result)) { //OAuthFetcher only $metadata = $this->oauthFetcher->getResponseMetadata(); $json = array($url => $metadata); $json = json_encode($json); $output = UNPARSEABLE_CRUFT . $json; $this->setCachingHeaders(); header("Content-Type: application/json; charset=utf-8", true); echo $output; die(); } $status = (int)$result->getHttpCode(); //header("HTTP/1.1 $status", true); if ($status == 200) { $output = ''; if (isset($_REQUEST['contentType']) && $_REQUEST['contentType'] == 'FEED') { require 'external/Zend/Feed.php'; $numEntries = $_REQUEST['numEntries']; $getSummaries = ! empty($_REQUEST['getSummaries']) && $_REQUEST['getSummaries'] != 'false' ? true : false; $channel = array(); $request = new RemoteContentRequest($url); $request = $this->context->getHttpFetcher()->fetch($request, $this->context); if ((int)$result->getHttpCode() == 200) { $content = $result->getResponseContent(); try { $feed = Zend_Feed::importString($content); if ($feed instanceof Zend_Feed_Rss) { // Try get author if ($feed->author()) { $author = $feed->author(); } else { if ($feed->creator()) { $author = $feed->creator(); } else { $author = null; } } // Loop over each channel item and store relevant data $counter = 0; $channel['Entry'] = array(); foreach ($feed as $item) { if ($counter >= $numEntries) { break; } $_entry = array(); $_entry['Title'] = $item->title(); $_entry['Link'] = $item->link(); if ($getSummaries && $item->description()) { $_entry['Summary'] = $item->description(); } $date = 0; if ($item->date()) { $date = strtotime($item->date()); } else { if ($item->pubDate()) { $date = strtotime($item->pubDate()); } } $_entry['Date'] = $date; $channel['Entry'][] = $_entry; // Remember author if first found if (empty($author) && $item->author()) { $author = $item->author(); } else if ($item->creator()) { $author = $item->creator(); } $counter ++; } $channel['Title'] = $feed->title(); $channel['URL'] = $url; $channel['Description'] = $feed->description(); if ($feed->link()) { if (is_array($feed->link())) { foreach ($feed->link() as $_link) { if ($_link->nodeValue) $channel['Link'] = $_link->nodeValue; } } else { $channel['Link'] = $feed->link(); } } if ($author != null) { $channel['Author'] = $author; } } elseif ($feed instanceof Zend_Feed_Atom) { // Try get author if ($feed->author()) { if ($feed->author->name()) { $author = $feed->author->name(); } else if ($feed->author->email()) { $author = $feed->author->email(); } else { $author = $feed->author(); } } else { $author = null; } // Loop over each entries and store relevant data $counter = 0; $channel['Entry'] = array(); foreach ($feed as $entry) { if ($counter >= $numEntries) { break; } $_entry = array(); $_entry['Title'] = $entry->title(); // get Link if rel="alternate" if ($entry->link('alternate')) { $_entry['Link'] = $entry->link('alternate'); } else { // if there's no alternate, pick the one without "rel" attribtue $_links = $entry->link; if (is_array($_links)) { foreach ($_links as $_link) { if (empty($_link['rel'])) { $_entry['Link'] = $_link['href']; break; } } } else { $_entry['Link'] = $_links['href']; } } if ($getSummaries && $entry->summary()) { $_entry['Summary'] = $entry->summary(); } $date = 0; if ($entry->updated()) { $date = strtotime($entry->updated()); } else { if ($entry->published()) { $date = strtotime($entry->published()); } } $_entry['Date'] = $date; $channel['Entry'][] = $_entry; // Remember author if first found if (empty($author) && $entry->author()) { if ($entry->author->name()) { $author = $entry->author->name(); } else if ($entry->author->email()) { $author = $entry->author->email(); } else { $author = $entry->author(); } } elseif (empty($author)) { $author = null; } $counter ++; } $channel['Title'] = $feed->title(); $channel['URL'] = $url; $channel['Description'] = $feed->subtitle(); // get Link if rel="alternate" if ($feed->link('alternate')) { $channel['Link'] = $feed->link('alternate'); } else { // if there's no alternate, pick the one without "rel" attribtue $_links = $feed->link; if (is_array($_links)) { foreach ($_links as $_link) { if (empty($_link['rel'])) { $channel['Link'] = $_link['href']; break; } } } else { $channel['Link'] = $_links['href']; } } if (! empty($author)) { $channel['Author'] = $author; } } else { throw new Exception('Invalid feed type'); } $resp = json_encode($channel); } catch (Zend_Feed_Exception $e) { $resp = 'Error parsing feed: ' . $e->getMessage(); } } else { // feed import failed $resp = "Error fetching feed, response code: " . $result->getHttpCode(); } } else { $resp = $result->getResponseContent(); } $json = array($url => array('body' => $resp, 'rc' => $status)); $json = json_encode($json); $output = UNPARSEABLE_CRUFT . $json; $this->setCachingHeaders(); // header("Content-Type: application/json; charset=utf-8", true); echo $output; } else { @ob_end_clean(); header("HTTP/1.0 404 Not Found", true); echo "

404 - Not Found

"; } die(); } /** * Fetches the content and returns it as-is using the headers as returned * by the remote host. * * @param string $url the url to retrieve * @param GadgetSigner $signer the GadgetSigner to use * @param string $method either get or post */ public function fetch($url, $signer, $method) { $url = $this->validateUrl($url); //TODO: Fetcher needs to handle variety of HTTP methods. $result = $this->fetchContent($url, $method); // TODO: Fetcher needs to handle variety of HTTP methods. $status = (int)$result->getHttpCode(); if ($status == 200) { $headers = explode("\n", $result->getResponseHeaders()); $isShockwaveFlash = false; foreach ($headers as $header) { if (strpos($header, ':')) { $key = trim(substr($header, 0, strpos($header, ':'))); $val = trim(substr($header, strpos($header, ':') + 1)); // filter out headers that would otherwise mess up our output if (strcasecmp($key, "Transfer-Encoding") != 0 && strcasecmp($key, "Cache-Control") != 0 && strcasecmp($key, "Expires") != 0 && strcasecmp($key, "Content-Length") != 0 && strcasecmp($key, "ETag") != 0) { header("$key: $val"); } if ($key == 'Content-Type' && $val == 'application/x-shockwave-flash') { // We're skipping the content disposition header for flash due to an issue with Flash player 10 // This does make some sites a higher value phishing target, but this can be mitigated by // additional referer checks. $isShockwaveFlash = true; } } } if (!$isShockwaveFlash) { header('Content-Disposition: attachment;filename=p.txt'); } $etag = md5($result->getResponseContent()); $lastModified = $result->getResponseHeader('Last-Modified') != null ? $result->getResponseHeader('Last-Modified') : gmdate('D, d M Y H:i:s', $result->getCreated()) . ' GMT'; $notModified = false; // If HTTP_PRAGMA | HTTP_CACHE_CONTROL == no-cache, the browser wants to do a 'forced reload' if (! isset($_SERVER['HTTP_PRAGMA']) || ! strstr(strtolower($_SERVER['HTTP_PRAGMA']), 'no-cache') && (! isset($_SERVER['HTTP_CACHE_CONTROL']) || ! strstr(strtolower($_SERVER['HTTP_CACHE_CONTROL']), 'no-cache'))) { if (isset($_SERVER['HTTP_IF_NONE_MATCH']) && $_SERVER['HTTP_IF_NONE_MATCH'] == $etag) { // if e-tag's match, set not modified, and no need to check the if-modified-since headers $notModified = true; } elseif (isset($_SERVER['HTTP_IF_MODIFIED_SINCE']) && $lastModified && ! isset($_SERVER['HTTP_IF_NONE_MATCH'])) { $if_modified_since = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']); // Use the request's Last-Modified, otherwise fall back on our internal time keeping (the time the request was created) $lastModified = strtotime($lastModified); if ($lastModified <= $if_modified_since) { $notModified = true; } } } $this->setCachingHeaders($etag, $this->context->getRefreshInterval(), $lastModified); // If the cached file time is within the refreshInterval params value and the ETag match, return not-modified if ($notModified) { header('HTTP/1.0 304 Not Modified', true); header('Content-Length: 0', true); } else { // then echo the content echo $result->getResponseContent(); } } else { @ob_end_clean(); header("HTTP/1.0 404 Not Found", true); echo "

404 - Not Found ($status)

"; echo ""; } // make sure the HttpServlet destructor doesn't override ours die(); } /** * Both fetch and fetchJson call this function to retrieve the actual content * * @param string $url the url to fetch * @param string $method either get or post * @return the filled in request (RemoteContentRequest) */ private function fetchContent($url, $method) { //TODO get actual character encoding from the request // Check the protocol requested - curl doesn't really support file:// // requests but the 'error' should be handled properly $protocolSplit = explode('://', $url, 2); if (count($protocolSplit) < 2) { throw new Exception("Invalid protocol specified"); } else { $protocol = strtoupper($protocolSplit[0]); if ($protocol != "HTTP" && $protocol != "HTTPS" && $protocol != "FTP") { throw new Exception("Invalid protocol specified in url ($protocol)"); } } // Extract the request headers from the $_SERVER super-global (this -does- unfortunatly mean that any header that php doesn't understand won't be proxied thru though) // if this turns out to be a problem we could add support for HTTP_RAW_HEADERS, but this depends on a php.ini setting, so i'd rather prevent that from being required $headers = ''; $context = new GadgetContext('GADGET'); $requestHeaders = $this->request_headers(); foreach ($requestHeaders as $key => $val) { if ($key != 'Keep-alive' && $key != 'Connection' && $key != 'Host' && $key != 'Accept' && $key != 'Accept-Encoding') { // propper curl header format according to http://www.php.net/manual/en/function.curl-setopt.php#80099 $headers .= "$key: $val\n"; } } if ($method == 'POST') { $data = isset($_GET['postData']) ? $_GET['postData'] : false; if (! $data) { $data = isset($_POST['postData']) ? $_POST['postData'] : false; } $postData = ''; if ($data) { $entries = explode('&', $data); foreach ($entries as $entry) { $entry = urldecode($entry); $parts = explode('=', $entry); // Process only if its a valid value=something pair if (count($parts) == 2) { $postData .= urlencode($parts[0]) . '=' . urlencode($parts[1]) . '&'; } } // chop of the trailing & if (strlen($postData)) { $postData = substr($postData, 0, strlen($postData) - 1); } } // even if postData is an empty string, it will still post (since RemoteContentRquest checks if its false) // so the request to POST is still honored $request = new RemoteContentRequest($url, $headers, $postData); $request = $this->context->getHttpFetcher()->fetch($request, $context); } else { $request = new RemoteContentRequest($url, $headers); $request = $this->context->getHttpFetcher()->fetch($request, $context); } return $request; } private function fetchContentDivert($url, $method, $signer) { $authz = isset($_GET['authz']) ? $_GET['authz'] : (isset($_POST['authz']) ? $_POST['authz'] : ''); $token = $this->context->extractAndValidateToken($signer); switch (strtoupper($authz)) { case 'SIGNED': $fetcher = $this->signingFetcher->getSigningFetcher(new BasicRemoteContentFetcher(), $token); return $fetcher->fetch($url, $method); case 'OAUTH': $params = new OAuthRequestParams(); $fetcher = $this->signingFetcher->getSigningFetcher(new BasicRemoteContentFetcher(), $token); $oAuthFetcherFactory = new OAuthFetcherFactory($fetcher); $this->oauthFetcher = $oAuthFetcherFactory->getOAuthFetcher($fetcher, $token, $params); $request = new RemoteContentRequest($url); $request->createRemoteContentRequestWithUri($url); return $this->oauthFetcher->fetch($request); case 'NONE': default: return $this->fetchContent($url, $method); } } public function setContentFetcher($contentFetcherFactory) { $this->contentFetcherFactory = $contentFetcherFactory; } /** * Sets the caching headers (overwriting anything the remote host set) to force * the browser not to cache this. * */ private function setCachingHeaders($etag = false, $maxAge = false, $lastModified = false) { if ($etag) { header("ETag: $etag"); } if ($lastModified) { header("Last-Modified: $lastModified"); } $expires = $maxAge !== false ? time() + $maxAge : time() - 3000; $public = $maxAge ? 'public' : 'private'; $maxAge = $maxAge === false ? '0' : $maxAge; header("Cache-Control: {$public}; max-age={$maxAge}", true); header("Expires: " . gmdate("D, d M Y H:i:s", $expires) . " GMT", true); } /** * Empty function, should make something practical here some day. * it's function should be to validate the given url if its in * correct http(s):port://location/url format * * @param string $url * @return string the 'validated' url */ private function validateUrl($url) { if (! @parse_url($url)) { throw new Exception("Invalid Url"); } else { return $url; } } private function request_headers() { // Try to use apache's request headers if available if (function_exists("apache_request_headers")) { if (($headers = apache_request_headers())) { return $headers; } } // if that failed, try to create them from the _SERVER superglobal $headers = array(); foreach (array_keys($_SERVER) as $skey) { if (substr($skey, 0, 5) == "HTTP_") { $headername = str_replace(" ", "-", ucwords(strtolower(str_replace("_", " ", substr($skey, 0, 5))))); $headers[$headername] = $_SERVER[$skey]; } } return $headers; } }