|
|
|
Regular Expressions for URI Validation/Parsing
0
wizard04
(Supported by JavaScript, maybe other languages)
//replace() can be used to parse the URI. For example, to get the path:
// path = uri.replace(regexUri, "$5$6");
//****************************************************//
//***************** Validate a URI *******************//
//****************************************************//
//- The different parts are kept in their own groups and can be recombined
// depending on the scheme:
// - http as $1://$3:$4$5?$7#$8
// - ftp as $1://$2@$3:$4$5
// - mailto as $1:$6?$7
//- groups are as follows:
// 1 == scheme
// 2 == userinfo
// 3 == host
// 4 == port
// 5,6 == path (5 if it has an authority, 6 if it doesn't)
// 7 == query
// 8 == fragment
var regexUri = /^([a-z0-9+.-]+):(?://(?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(?::(\d*))?(/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?|(/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?$/i;
/*composed as follows:
^
([a-z0-9+.-]+): #scheme
(?:
// #it has an authority:
(?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)? #userinfo
((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*) #host
(?::(\d*))? #port
(/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)? #path
|
#it doesn't have an authority:
(/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)? #path
)
(?:
\?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*) #query string
)?
(?:
#((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*) #fragment
)?
$
*/
//****************************************************//
//** Validate a URI (includes delimiters in groups) **//
//****************************************************//
//- The different parts--along with their delimiters--are kept in their own
// groups and can be recombined as $1$6$2$3$4$5$7$8$9
//- groups are as follows:
// 1,6 == scheme:// or scheme:
// 2 == userinfo@
// 3 == host
// 4 == :port
// 5,7 == path (5 if it has an authority, 7 if it doesn't)
// 8 == ?query
// 9 == #fragment
var regexUriDelim = /^(?:([a-z0-9+.-]+:\/\/)((?:(?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(:(?:\d*))?(\/(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?|([a-z0-9+.-]+:)(\/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?)(\?(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?(#(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?$/i;
//****************************************************//
//***************** Validate a URL *******************//
//****************************************************//
//Validates a URI with an http or https scheme.
//- The different parts are kept in their own groups and can be recombined as
// $1://$2:$3$4?$5#$6
//- Does not validate the host portion (domain); just makes sure the string
// consists of valid characters (does not include IPv6 nor IPvFuture
// addresses as valid).
var regexUrl = /^(https?):\/\/((?:[a-z0-9.-]|%[0-9A-F]{2}){3,})(?::(\d+))?((?:\/(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})*)*)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;
//****************************************************//
//**************** Validate a Mailto *****************//
//****************************************************//
//Validates a URI with a mailto scheme.
//- The different parts are kept in their own groups and can be recombined as
// $1:$2?$3
//- Does not validate the email addresses themselves.
var regexMailto = /^(mailto):((?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+)?(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;
// path = uri.replace(regexUri, "$5$6");
//****************************************************//
//***************** Validate a URI *******************//
//****************************************************//
//- The different parts are kept in their own groups and can be recombined
// depending on the scheme:
// - http as $1://$3:$4$5?$7#$8
// - ftp as $1://$2@$3:$4$5
// - mailto as $1:$6?$7
//- groups are as follows:
// 1 == scheme
// 2 == userinfo
// 3 == host
// 4 == port
// 5,6 == path (5 if it has an authority, 6 if it doesn't)
// 7 == query
// 8 == fragment
var regexUri = /^([a-z0-9+.-]+):(?://(?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(?::(\d*))?(/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?|(/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?$/i;
/*composed as follows:
^
([a-z0-9+.-]+): #scheme
(?:
// #it has an authority:
(?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)? #userinfo
((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*) #host
(?::(\d*))? #port
(/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)? #path
|
#it doesn't have an authority:
(/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)? #path
)
(?:
\?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*) #query string
)?
(?:
#((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*) #fragment
)?
$
*/
//****************************************************//
//** Validate a URI (includes delimiters in groups) **//
//****************************************************//
//- The different parts--along with their delimiters--are kept in their own
// groups and can be recombined as $1$6$2$3$4$5$7$8$9
//- groups are as follows:
// 1,6 == scheme:// or scheme:
// 2 == userinfo@
// 3 == host
// 4 == :port
// 5,7 == path (5 if it has an authority, 7 if it doesn't)
// 8 == ?query
// 9 == #fragment
var regexUriDelim = /^(?:([a-z0-9+.-]+:\/\/)((?:(?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(:(?:\d*))?(\/(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?|([a-z0-9+.-]+:)(\/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?)(\?(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?(#(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?$/i;
//****************************************************//
//***************** Validate a URL *******************//
//****************************************************//
//Validates a URI with an http or https scheme.
//- The different parts are kept in their own groups and can be recombined as
// $1://$2:$3$4?$5#$6
//- Does not validate the host portion (domain); just makes sure the string
// consists of valid characters (does not include IPv6 nor IPvFuture
// addresses as valid).
var regexUrl = /^(https?):\/\/((?:[a-z0-9.-]|%[0-9A-F]{2}){3,})(?::(\d+))?((?:\/(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})*)*)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;
//****************************************************//
//**************** Validate a Mailto *****************//
//****************************************************//
//Validates a URI with a mailto scheme.
//- The different parts are kept in their own groups and can be recombined as
// $1:$2?$3
//- Does not validate the email addresses themselves.
var regexMailto = /^(mailto):((?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+)?(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;




There are currently no comments for this snippet.