Replacing the non-white space character with non-word boundary fixed a bug that this process got choked up on when similiarly-named tags where positioned close together:
/**
* ChronoFish May 2009
* www.chronofish.com
* xml2array will convert an XML document to an array.
* When attributes is true, resultant array will group by tag and exact-match attributes.
* When attributes is false, resultant array will group by tag and ignore attributes.
*
* The resultant array will be an associative array with tag (or tag + attributues) as the key. Each value
* will either be an XML2Array array (another associative array), or an array of value.
*
* Assumptions:
* 1. XML is well formed
* 2. White space is wrapped in CDATA
*/ function xml2array($originalXML, $attributes=true) { $xmlArray = array(); $search = $attributes ? '|<((\w+?)(.*))\s*>(.*)</\2>|Ums' : '|<((\w+?)()).*>(.*)</\2>|Ums';
// normalize data $xml = preg_replace('|>\s*<|', ">\n<", $originalXML); // one tag per line $xml = preg_replace('|<\?.*\?>|', '', $xml); // remove XML declarations $xml = preg_replace('|<(\w+?)(.*)/>|U', '<$1$2></$1>', $xml); //Expand singletons
if(! preg_match_all($search, $xml, $xmlMatches)) returntrim($originalXML); // bail out - no XML found
$search = $attributes ? '|(.*)|Ums' : '|(.*)|Ums';
See the revised solution below:
/**
* ChronoFish May 2009
* www.chronofish.com
* xml2array will convert an XML document to an array.
* When attributes is true, resultant array will group by tag and exact-match attributes.
* When attributes is false, resultant array will group by tag and ignore attributes.
*
* The resultant array will be an associative array with tag (or tag + attributues) as the key. Each value
* will either be an XML2Array array (another associative array), or an array of value.
*
* Assumptions:
* 1. XML is well formed
* 2. White space is wrapped in CDATA
*/
function xml2array($originalXML, $attributes=true)
{
$xmlArray = array();
$search = $attributes ? '|<((\w+?)(.*))\s*>(.*)</\2>|Ums' : '|<((\w+?)()).*>(.*)</\2>|Ums';
// normalize data
$xml = preg_replace('|>\s*<|', ">\n<", $originalXML); // one tag per line
$xml = preg_replace('|<\?.*\?>|', '', $xml); // remove XML declarations
$xml = preg_replace('|<(\w+?)(.*)/>|U', '<$1$2></$1>', $xml); //Expand singletons
if (! preg_match_all($search, $xml, $xmlMatches))
return trim($originalXML); // bail out - no XML found
foreach ($xmlMatches[1] as $index => $key)
{
if (! isset($xmlArray[$key])) $xmlArray[$key] = array();
$xmlArray[$key][] = xml2array($xmlMatches[4][$index], $attributes);
}
return $xmlArray;
}