/** * Returns the contents of a web page * * Opens a remote web document, and returns the contents. This method * uses sockets to get the document. Returns the contents of the document with * the headers being stored in the $headers variable. * This method can handle 301 and 302 redirects, but will return false * if anything other than those status codes, or a 200, is returned by * the web server. * * @param string $url The URL to the web page * @param string $headers The HTTP headers will be stored in this variable * @param int $port Optional port number to use for grabbing the document * @param int $timeout The number of seconds to try before giving up * @return mixed */ function get_http_document($url, &$headers = null, $port = 80, $timeout = 8) { $pURL = parse_url($url); if (empty($pURL['host'])) { return false; } $remotePath = (isset($pURL['path'])) ? $pURL['path'] : '/'; $remoteDocument = (empty($pURL['query'])) ? $remotePath : $remotePath . '?' . $pURL['query']; if (!$fp = fsockopen($pURL['host'], $port, $errno, $errstr, $timeout)) { return false; } $out = "GET $remoteDocument HTTP/1.0\r\n"; $out .= "Host: {$pURL['host']}\r\n"; $out .= "Connection: Close\r\n\r\n"; fwrite($fp, $out); unset($out); $received = ''; while (!feof($fp)) { $received .= fread($fp, 128); } fclose($fp); // Seperate the headers from the content $parts = explode("\r\n\r\n", $received, 2); $headers = $parts[0]; $content = $parts[1]; unset($parts); $headerParts = explode("\r\n", $headers); if (!preg_match('~HTTP/1\.\d ([\d]+)~i', $headerParts[0], $matches)) { return false; } $statusCode = $matches[1]; if ($statusCode == 200) { return $content; } else if ($statusCode != 301 && $statusCode != 302) { return false; } if (!preg_match('~^Location:(.*)$~im', $headers, $matches)) { return false; } $newLocation = trim($matches[1]); return get_http_document($newLocation, $headers, $port, $timeout); }