handle = $this->initCurlHandle($request->getUrl()); $this->setHeaders($request); // Execute the request $content = @curl_exec($request->handle); $this->parseResult($request, $content); curl_close($request->handle); unset($request->handle); return $request; } /** * Performs multiple (array of RemoteContentRequest) requests and fills in the responses * in the $request objects * * @param Array of RemoteContentRequest's $requests * @return array $requests */ public function multiFetchRequest(Array $requests) { $mh = curl_multi_init(); foreach ($requests as $request) { $request->handle = $this->initCurlHandle($request->getUrl()); // Set this so the multihandler will return data curl_setopt($request->handle, CURLOPT_RETURNTRANSFER, 1); $this->setHeaders($request); curl_multi_add_handle($mh, $request->handle); } $running = null; do { curl_multi_exec($mh, $running); } while ($running > 0); foreach ($requests as $request) { // Execute the request $content = curl_multi_getcontent($request->handle); $this->parseResult($request, $content); curl_multi_remove_handle($mh, $request->handle); unset($request->handle); } curl_multi_close($mh); unset($mh); return $requests; } /** * Parses the result content into the headers and body, and retrieves the http code and content type * * @param RemoteContentRequest $request * @param string $content */ protected function parseResult(RemoteContentRequest $request, $content) { $headers = ''; $body = ''; $httpCode = curl_getinfo($request->handle, CURLINFO_HTTP_CODE); $contentType = curl_getinfo($request->handle, CURLINFO_CONTENT_TYPE); // Attempt to magically convert all text'ish responses to UTF8, especially the xml and json parsers get upset if invalid UTF8 is encountered $textTypes = array('text', 'html', 'json', 'xml', 'atom'); $isTextType = false; $isXml = false; foreach ($textTypes as $textType) { if (strpos($contentType, $textType) !== false) { if ($textType === 'xml') { $isXml = true; } $isTextType = true; break; } } if ($isTextType && function_exists('mb_convert_encoding')) { // try to retrieve content type out of $charset = 'UTF-8'; $matchedCharset = array(); if (0 != preg_match("/charset\s*=\s*([^\"' >]*)/ix",$content, $matchedCharset) || //http header or html meta tags 0 != preg_match("/encoding\s*=\s*[\'\"]([^\"' >]*)/ix",$content, $matchedCharset)) { //xml declaration if (trim($matchedCharset[1])) { $charset = trim($matchedCharset[1]); if (($pos = strpos($charset, "\n")) !== false) { $charset = trim(substr($charset, 0, $pos)); } } } // the xml and json parsers get very upset if there are invalid UTF8 sequences in the string, by recoding it any bad chars will be filtered out $content = mb_convert_encoding($content, 'UTF-8', $charset); // if original charset is not utf-8 we now try to rewrite any xml declarations if ($isXml === true && strtoupper($charset) !== 'UTF-8') { $pattern = 'encoding=\s*([\'"])' . $charset . '\s*\1'; $content = mb_ereg_replace($pattern, 'encoding="UTF-8"', $content, "i"); } } // on redirects and such we get multiple headers back from curl it seems, we really only want the last one while (substr($content, 0, strlen('HTTP')) == 'HTTP' && strpos($content, "\r\n\r\n") !== false) { $headers = substr($content, 0, strpos($content, "\r\n\r\n")); $content = $body = substr($content, strlen($headers) + 4); } $headers = explode("\n", $headers); $parsedHeaders = array(); foreach ($headers as $header) { if (strpos($header, ':')) { $key = trim(substr($header, 0, strpos($header, ':'))); $key = str_replace(' ', '-', ucwords(str_replace('-', ' ', $key))); $val = trim(substr($header, strpos($header, ':') + 1)); $parsedHeaders[$key] = $val; } } if (! $httpCode) { $httpCode = '404'; } if (curl_errno($request->handle)) { $httpCode = '500'; $body = 'Curl error: ' . curl_error($request->handle); } $request->setHttpCode($httpCode); $request->setHttpCodeMsg($this->resolveHttpCode($httpCode)); $request->setContentType($contentType); $request->setResponseHeaders($parsedHeaders); $request->setResponseContent($body); $request->setResponseSize(strlen($content)); } /** * Misc function to resolve http status codes to a matching http code message * since curl strips those, but we do need'm in the proxy handler * @param $httpCode * @return string */ private function resolveHttpCode($httpCode) { switch ((int)$httpCode) { case 100: return "Continue"; case 101: return "Switching Protocols"; case 200: return "OK"; case 201: return "Created"; case 202: return "Accepted"; case 203: return "Non-Authoritative Information"; case 204: return "No Content"; case 205: return "Reset Content"; case 206: return "Partial Content"; case 300: return "Multiple Choices"; case 301: return "Moved Permanently"; case 302: return "Found"; case 303: return "See Other"; case 304: return "Not Modified"; case 305: return "Use Proxy"; case 306: return "(Unused)"; case 307: return "Temporary Redirect"; case 400: return "Bad Request"; case 401: return "Unauthorized"; case 402: return "Payment Required"; case 403: return "Forbidden"; case 404: return "Not Found"; case 405: return "Method Not Allowed"; case 406: return "Not Acceptable"; case 407: return "Proxy Authentication Required"; case 408: return "Request Timeout"; case 409: return "Conflict"; case 410: return "Gone"; case 411: return "Length Required"; case 412: return "Precondition Failed"; case 413: return "Request Entity Too Large"; case 414: return "Request-URI Too Long"; case 415: return "Unsupported Media Type"; case 416: return "Requested Range Not Satisfiable"; case 417: return "Expectation Failed"; case 500: return "Internal Server Error"; case 501: return "Not Implemented"; case 502: return "Bad Gateway"; case 503: return "Service Unavailable"; case 504: return "Gateway Timeout"; case 505: return "HTTP Version Not Supported"; default : return "Unknown Error"; } } /** * Sets the headers and post body for the request if they are specified * * @param RemoteContentRequest $request */ private function setHeaders(RemoteContentRequest $request) { if ($request->hasHeaders()) { $headers = explode("\n", $request->getHeaders()); $outHeaders = array(); foreach ($headers as $header) { if (strpos($header, ':')) { $key = trim(substr($header, 0, strpos($header, ':'))); $key = str_replace(' ', '-', ucwords(str_replace('-', ' ', $key))); $val = trim(substr($header, strpos($header, ':') + 1)); if (! in_array($key, $this->disallowedHeaders)) { $outHeaders[] = "$key: $val"; } } } $outHeaders[] = "User-Agent: " . BasicRemoteContentFetcher::USER_AGENT; curl_setopt($request->handle, CURLOPT_HTTPHEADER, $outHeaders); } $method = $request->getMethod(); if ($request->isPost()) { curl_setopt($request->handle, CURLOPT_POST, 1); curl_setopt($request->handle, CURLOPT_POSTFIELDS, $request->getPostBody()); } else if ($method == 'DELETE' || $method == 'HEAD' || $method == 'PUT') { curl_setopt($request->handle, CURLOPT_CUSTOMREQUEST, $method); if ($method == "PUT") { curl_setopt($request->handle, CURLOPT_POSTFIELDS, $request->getPostBody()); } } } /** * Initializes a curl handle for making a request * This will set the timeout based on the 'curl_connection_timeout configuration', and * set a proxy server to use if the 'proxy' config string is not empty * * @param string $url * @return curl handle */ private function initCurlHandle($url) { $handle = curl_init(); curl_setopt($handle, CURLOPT_URL, $url); // CURLOPT_FOLLOWLOCATION doesn't work with PHP safemode and openbasedir turned on $isOpenBasedir = false; $isSafeMode = false; try { $isOpenBasedir = @ini_get('open_basedir'); $isSafeMode = @ini_get('safe_mode'); $isOpenBasedir = !empty($isOpenBasedir); $isSafeMode = !empty($isSafeMode); } catch (\Exception $e) { $isOpenBasedir = false; $isSafeMode = false; } if(!$isOpenBasedir && !$isSafeMode) { curl_setopt($handle, CURLOPT_FOLLOWLOCATION, 1); } else { curl_setopt($handle, CURLOPT_FOLLOWLOCATION, 0); } curl_setopt($handle, CURLOPT_BINARYTRANSFER, 1); curl_setopt($handle, CURLOPT_RETURNTRANSFER, 1); curl_setopt($handle, CURLOPT_AUTOREFERER, 1); curl_setopt($handle, CURLOPT_MAXREDIRS, 10); curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, Config::get('curl_connection_timeout')); curl_setopt($handle, CURLOPT_TIMEOUT, Config::get('curl_request_timeout')); curl_setopt($handle, CURLOPT_HEADER, 1); curl_setopt($handle, CURLOPT_SSL_VERIFYPEER, 0); $proxy = Config::get('proxy'); if (! empty($proxy)) { curl_setopt($handle, CURLOPT_PROXY, $proxy); } return $handle; } }