Regular Expressions for URI Validation/Parsing





0
Date Submitted Wed. Mar. 3rd, 2010 11:05 AM
Revision 1 of 1
Beginner wizard04
Tags Email | mailto | Parse | regex | URI | URL | validation
Comments 0 comments
(Supported by JavaScript, maybe other languages)
//replace() can be used to parse the URI. For example, to get the path:
//  path = uri.replace(regexUri, "$5$6");

//****************************************************//
//***************** Validate a URI *******************//
//****************************************************//
//- The different parts are kept in their own groups and can be recombined
//  depending on the scheme:
//  - http as $1://$3:$4$5?$7#$8
//  - ftp as $1://$2@$3:$4$5
//  - mailto as $1:$6?$7
//- groups are as follows:
//  1   == scheme
//  2   == userinfo
//  3   == host
//  4   == port
//  5,6 == path (5 if it has an authority, 6 if it doesn't)
//  7   == query
//  8   == fragment

var regexUri = /^([a-z0-9+.-]+):(?://(?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(?::(\d*))?(/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?|(/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?$/i;
/*composed as follows:
        ^
        ([a-z0-9+.-]+):       #scheme
        (?:
                //                                          #it has an authority:
                (?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?      #userinfo
                ((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)          #host
                (?::(\d*))?                              #port
                (/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?        #path
                |
                                                                        #it doesn't have an authority:
                (/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?    #path
        )
        (?:
                \?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*)       #query string
        )?
        (?:
                #((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*)        #fragment
        )?
        $
*/


//****************************************************//
//** Validate a URI (includes delimiters in groups) **//
//****************************************************//
//- The different parts--along with their delimiters--are kept in their own
//  groups and can be recombined as $1$6$2$3$4$5$7$8$9
//- groups are as follows:
//  1,6 == scheme:// or scheme:
//  2   == userinfo@
//  3   == host
//  4   == :port
//  5,7 == path (5 if it has an authority, 7 if it doesn't)
//  8   == ?query
//  9   == #fragment

var regexUriDelim = /^(?:([a-z0-9+.-]+:\/\/)((?:(?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(:(?:\d*))?(\/(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?|([a-z0-9+.-]+:)(\/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?)(\?(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?(#(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?$/i;

//****************************************************//
//***************** Validate a URL *******************//
//****************************************************//
//Validates a URI with an http or https scheme.
//- The different parts are kept in their own groups and can be recombined as
//  $1://$2:$3$4?$5#$6
//- Does not validate the host portion (domain); just makes sure the string
//  consists of valid characters (does not include IPv6 nor IPvFuture
//  addresses as valid).

var regexUrl = /^(https?):\/\/((?:[a-z0-9.-]|%[0-9A-F]{2}){3,})(?::(\d+))?((?:\/(?:[a-z0-9-._~!$&'
()*+,;=:@]|%[0-9A-F]{2})*)*)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;

//****************************************************//
//**************** Validate a Mailto *****************//
//****************************************************//
//Validates a URI with a mailto scheme.
//- The different parts are kept in their own groups and can be recombined as
//  $1:$2?$3
//- Does not validate the email addresses themselves.

var regexMailto = /^(mailto):((?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+)?(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;

Andy Harrison

Comments

There are currently no comments for this snippet.

Voting

Votes Up


Votes Down