|
|
|
Zip Codes
23
If you've ever wanted to build an application that can both find the distance between two zip codes, and find zip codes within a specific radius of another zip code, then this is the snippet for you.
I've tried to build this before using PHP4 and MySQL 4 and have found it to be painfully slow for a one-user connection over a Local Area Network (LAN). So, before you continue reading, I must make it abundantly clear that this method requires PHP 5 and MySQL 5.
This is because in PHP5, we will need the MySQL-Improved interfaces, and in MySQL 5, we will need functions and procedures.
Using functions and procedures in MySQL puts the weight of the calculations on MySQL, where they will run hundreds of times faster than if PHP were to attempt to do the work. In addition, using functions and procedures is still hundreds of times faster than carefully crafted SQL statements (one benchmark took over 10 seconds using a WHERE clause, while the function + procedure method took around .12 seconds--note the dot).
Since I am not a mathematician by trade, I found the necessary calculations using Google.
For the "Great Circle Distance" formula, I used Meridian Word Data's website. For the radius finder, I used another that unfortunately I do not have off the top of my head right now. I will look for it again and repost it for proper accreditation.
For the zip code database, I am using the free zip code database from CFDynamics.
Below, you will find the database and table schema, the functions and procedures, and also a demo PHP script to see all of this put into action. As a courtesy, you may download a dump of this database from my website. It's uncompressed size is 3.4M, and it's compressed size is 537K. The checksum is available here.
Please leave me comments, suggestions, questions, and/or rants about this snippet. I feel this is a very useful bit of code, and hope that others out there may find as much use from it as I will be.
I've tried to build this before using PHP4 and MySQL 4 and have found it to be painfully slow for a one-user connection over a Local Area Network (LAN). So, before you continue reading, I must make it abundantly clear that this method requires PHP 5 and MySQL 5.
This is because in PHP5, we will need the MySQL-Improved interfaces, and in MySQL 5, we will need functions and procedures.
Using functions and procedures in MySQL puts the weight of the calculations on MySQL, where they will run hundreds of times faster than if PHP were to attempt to do the work. In addition, using functions and procedures is still hundreds of times faster than carefully crafted SQL statements (one benchmark took over 10 seconds using a WHERE clause, while the function + procedure method took around .12 seconds--note the dot).
Since I am not a mathematician by trade, I found the necessary calculations using Google.
For the "Great Circle Distance" formula, I used Meridian Word Data's website. For the radius finder, I used another that unfortunately I do not have off the top of my head right now. I will look for it again and repost it for proper accreditation.
For the zip code database, I am using the free zip code database from CFDynamics.
Below, you will find the database and table schema, the functions and procedures, and also a demo PHP script to see all of this put into action. As a courtesy, you may download a dump of this database from my website. It's uncompressed size is 3.4M, and it's compressed size is 537K. The checksum is available here.
Please leave me comments, suggestions, questions, and/or rants about this snippet. I feel this is a very useful bit of code, and hope that others out there may find as much use from it as I will be.
DELIMITER $
DROP FUNCTION IF EXISTS zip_deg2rad$
DROP FUNCTION IF EXISTS zip_gcd$
DROP FUNCTION IF EXISTS zip_distance$
DROP PROCEDURE IF EXISTS zip_radius$
CREATE FUNCTION zip_deg2rad(DEGREES DOUBLE) RETURNS DOUBLE
BEGIN
RETURN DEGREES / (180 / PI()+0.000000000000000);
END$
CREATE FUNCTION zip_gcd(type ENUM('M', 'N', 'K'), src_lat DOUBLE, src_long DOUBLE, dst_lat DOUBLE, dst_long DOUBLE) RETURNS DOUBLE
BEGIN
DECLARE temp DOUBLE;
DECLARE STATUTE_MILES DECIMAL(5,1);
DECLARE NAUTICAL_MILES DECIMAL(9,5);
DECLARE KILOMETERS DECIMAL(5,1);
SET STATUTE_MILES = 3963.0;
SET NAUTICAL_MILES = 3437.74677;
SET KILOMETERS = 6378.7;
SET src_lat = zip_deg2rad(src_lat);
SET src_long = zip_deg2rad(src_long);
SET dst_lat = zip_deg2rad(dst_lat);
SET dst_long = zip_deg2rad(dst_long);
SET temp = ACOS(SIN(src_lat) * SIN(dst_lat) + COS(src_lat) * COS(dst_lat) * COS(dst_long - src_long));
IF type = "M" THEN
SET temp = STATUTE_MILES * temp;
END IF;
IF type = "N" THEN
SET temp = NAUTICAL_MILES * temp;
END IF;
IF type = "K" THEN
SET temp = KILOMETERS * temp;
END IF;
RETURN temp;
END$
CREATE FUNCTION zip_distance(type ENUM('M', 'N', 'K'), zip_start VARCHAR(5), zip_finish VARCHAR(5)) RETURNS DOUBLE
BEGIN
DECLARE distance DOUBLE;
DECLARE start_lat DOUBLE;
DECLARE start_long DOUBLE;
DECLARE finish_lat DOUBLE;
DECLARE finish_long DOUBLE;
SELECT latitude, longitude INTO start_lat, start_long FROM zipcodes WHERE zip = zip_start;
SELECT latitude, longitude INTO finish_lat, finish_long FROM zipcodes WHERE zip = zip_finish;
SELECT zip_gcd(type, start_lat, start_long, finish_lat, finish_long) INTO distance;
RETURN distance;
END$
CREATE PROCEDURE zip_radius(IN type ENUM('M', 'N', 'K'), IN zip_start VARCHAR(5), IN radius INT, prec INT)
BEGIN
DECLARE src_lat DOUBLE;
DECLARE src_long DOUBLE;
SELECT latitude, longitude INTO src_lat, src_long FROM zipcodes WHERE zip = zip_start;
SELECT zip, city, state, county, ROUND(zip_gcd(type, src_lat, src_long, latitude, longitude), prec) AS `distance`
FROM zipcodes
WHERE zip != zip_start
AND (POW((69.1 * (longitude - src_long) * COS(src_lat / 57.3)), 2) + POW((69.1 * (latitude - src_lat)), 2)) <= (radius * radius)
ORDER BY `distance` ASC;
END$
DELIMITER ;
<?php
$mysqli = mysqli_connect('localhost', 'dbuser', 'dbpass', 'zipcodes');
if (mysqli_connect_errno()) {
printf("Connection failed: %s\n", mysqli_connect_error());
exit;
}
$sql = "CALL zip_radius('M', 28273, 5, 0)";
if (!$mysqli->real_query($sql)) {
printf("Invalid query: %s\nWhole query: %s\n", $mysqli->error, $sql);
exit;
}
if ($result = $mysqli->store_result()) {
while ($record = $result->fetch_assoc()) {
$records[] = $record;
}
$result->close();
}
$mysqli->close();
print_r($records);
?>
Array
(
[0] => Array
(
[zip] => 28217
[city] => CHARLOTTE
[state] => NC
[county] => MECKLENBURG
[distance] => 2
)
[1] => Array
(
[zip] => 28134
[city] => PINEVILLE
[state] => NC
[county] => MECKLENBURG
[distance] => 4
)
[2] => Array
(
[zip] => 29716
[city] => FORT MILL
[state] => SC
[county] => YORK
[distance] => 5
)
[3] => Array
(
[zip] => 28210
[city] => CHARLOTTE
[state] => NC
[county] => MECKLENBURG
[distance] => 5
)
)




Not a programmer - a pretty fair engineer-mathematician 30 years ago but my comments for what they are worth:
* I think using zip_gcd may be overkill in zip_radius given that it uses a more approximate formula to filter. You know better than I whether it will make a perceptible difference in performance.
* I believe that the left side of the filter in zip_radius will always return statute miles while a user may be seduced into entering radius in the units specified by type
I've found alot of applications on the web where people are attempting a radius search, so the work you did is pertinent. I haven't found any that require accuracy greater than +-10%, so my approach will be to use the simplest approximation.
I ran an error check on the distance approximations using Census Bureau data from Autauga County, AL to all other counties in the 48 contiguous states + AK. Average error was 3.5% for the simplest, 2% for the more accurate approximation. Results were as bad as 30% off to points close to the north pole, but fine everywhere else.
I did clean up the database and replaced records where the 2000 Census defined a Zip Code Tabulation Area. Still a work in progress and, of course in Excel, but if I figure out how to make it available I will.
Thanks again.
I pared down your snippet to just a simple radius search in an 'unworldly' way and included it below. Application is joining with an address table to find out who lives within x miles. Any hint how to do that?
If you need further help, my username for gmail is dohpaz.
<?php
//THESE FILES ARE AVAILABLE FROM ZIPCODEDOWNLOAD.COM
include("DistanceAssistant.inc");
include("RadiusAssistant.inc");
//Simulate data entry
$ZIPCode = "90210";
$Miles = "23.74";
//Connect to database
$dbhost = "myhost"; //computer name or IP address
$dbuser = "myUserName";
$dbpass = "myPassword";
$db = "myDB_Name";
$dbConn = @mysql_connect($dbhost, $dbuser, $dbpass) or die("The site database is down. Please try logging on later.");
if ($db!="" and !@mysql_select_db($db))
die("The site database is unavailable. Please try logging on later.");
$sql = "SELECT Latitude,Longitude FROM ZIPCodes WHERE ZIPCode = '$ZIPCode'";
if (!$result = mysql_query($sql)) {
//Error in mySQL query
print "There was an error in sql statement, ".mysql_error()."<br><b>$sql</b>";
exit;
}
elseif ($result=="") {
//ZIP code does not exist - report to user...
echo "ZIP code does not exist";
}
else {
//ZIP code exists
$myrow = mysql_fetch_array($result);
$Latitude = $myrow["Latitude"];
$Longitude = $myrow["Longitude"];
$zcdRadius = new RadiusAssistant($Latitude,$Longitude,$Miles);
$minLat = $zcdRadius->MinLatitude();
$maxLat = $zcdRadius->MaxLatitude();
$minLong = $zcdRadius->MinLongitude();
$maxLong = $zcdRadius->MaxLongitude();
//mySQL Query
$sql = "SELECT * FROM ZIPCodes WHERE
Latitude >= $minLat
AND Latitude <= $maxLat
AND Longitude >= $minLong
AND Longitude <= $maxLong";
if (!$result = mysql_query($sql)) {
//Error in mySQL query
print "There was an error in sql statement, ".mysql_error()."<br><b>$sql</b>";
exit;
}
else {
$zcdDistance = new DistanceAssistant;
while ($myrow = mysql_fetch_array($result)){
$Distance = $zcdDistance->Calculate($Latitude,$Longitude,$myrow["Latitude"],$myrow["Longitude"]);
if ($Distance <= $Miles){
//this ZIP Code is within $Miles of $ZIPCode
//Display appropriate information to the user...
printf ("%s - %s<br>\n", $myrow["City"], $Distance);
}
}
}
}
mysql_close($dbConn);
?>