/**
* Returns the contents of a web page
*
* Opens a remote web document, and returns the contents. This method
* uses sockets to get the document. Returns the contents of the document with
* the headers being stored in the $headers variable.
* This method can handle 301 and 302 redirects, but will return false
* if anything other than those status codes, or a 200, is returned by
* the web server.
*
* @param string $url The URL to the web page
* @param string $headers The HTTP headers will be stored in this variable
* @param int $port Optional port number to use for grabbing the document
* @param int $timeout The number of seconds to try before giving up
* @return mixed
*/
function get_http_document
($url, &
$headers =
null,
$port =
80,
$timeout =
8) {
$pURL =
parse_url($url);
if (empty($pURL['host'])) {
return false;
}
$remotePath =
(isset($pURL['path'])) ?
$pURL['path'] :
'/';
$remoteDocument =
(empty($pURL['query'])) ?
$remotePath :
$remotePath .
'?' .
$pURL['query'];
if (!
$fp =
fsockopen($pURL['host'],
$port,
$errno,
$errstr,
$timeout)) {
return false;
}
$out =
"GET $remoteDocument HTTP/1.0\r\n";
$out .=
"Host: {$pURL['host']}\r\n";
$out .=
"Connection: Close\r\n\r\n";
fwrite($fp,
$out);
unset($out);
$received =
'';
while (!
feof($fp)) {
$received .=
fread($fp,
128);
}
fclose($fp);
// Seperate the headers from the content
$parts =
explode("\r\n\r\n",
$received,
2);
$headers =
$parts[0];
$content =
$parts[1];
unset($parts);
$headerParts =
explode("\r\n",
$headers);
if (!
preg_match('~HTTP/1\.\d ([\d]+)~i',
$headerParts[0],
$matches)) {
return false;
}
$statusCode =
$matches[1];
if ($statusCode ==
200) {
return $content;
} else if ($statusCode !=
301 &&
$statusCode !=
302) {
return false;
}
if (!
preg_match('~^Location:(.*)$~im',
$headers,
$matches)) {
return false;
}
$newLocation =
trim($matches[1]);
return get_http_document
($newLocation,
$headers,
$port,
$timeout);
}