Incorporate better regex from review
Signed-off-by: RMidhunSuresh <rmidhunsuresh@gmail.com>
This commit is contained in:
parent
00bcdbab37
commit
851e8d34a4
1 changed files with 11 additions and 14 deletions
|
@ -4,24 +4,20 @@ meaning that any escapes (\) must also
|
||||||
be escaped.
|
be escaped.
|
||||||
*/
|
*/
|
||||||
const scheme = "(?:https|http|ftp):\\/\\/";
|
const scheme = "(?:https|http|ftp):\\/\\/";
|
||||||
const host = "[a-zA-Z0-9:.\\[\\]-]";
|
const noSpaceNorPunctuation = "[^\\s.,?!]";
|
||||||
|
const hostCharacter = "[a-zA-Z0-9:.\\[\\]-]";
|
||||||
|
|
||||||
/*
|
/*
|
||||||
A URL containing path (/) or fragment (#) component
|
Using non-consuming group here to combine two criteria for the last character.
|
||||||
is allowed to end with any character which is not
|
See point 1 below.
|
||||||
space nor punctuation. The ending character may be
|
|
||||||
non-ASCII.
|
|
||||||
*/
|
*/
|
||||||
const end = "[^\\s.,?!]";
|
const host = `${hostCharacter}*(?=${hostCharacter})${noSpaceNorPunctuation}`;
|
||||||
const additional = `[\\/#][^\\s]*${end}`;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Similarly, a URL not containing path or fragment must
|
Use sub groups so we accept just / or #; but if anything comes after it,
|
||||||
also end with a character that is not space nor punctuation.
|
it should not end with punctuation or space.
|
||||||
Additionally, the ending character must also be ASCII.
|
|
||||||
*/
|
*/
|
||||||
const nonASCII = "\\u{80}-\\u{10ffff}";
|
const pathOrFragment = `(?:[\\/#](?:[^\\s]*${noSpaceNorPunctuation})?)`;
|
||||||
const endASCII = `[^\\s${nonASCII}.,?!]`;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Things to keep in mind:
|
Things to keep in mind:
|
||||||
|
@ -30,7 +26,8 @@ Things to keep in mind:
|
||||||
https://matrix.org/<smiley> - valid
|
https://matrix.org/<smiley> - valid
|
||||||
https://matrix.org<smiley> - invalid
|
https://matrix.org<smiley> - invalid
|
||||||
2. Do not treat punctuation at the end as a part of the URL (.,?!)
|
2. Do not treat punctuation at the end as a part of the URL (.,?!)
|
||||||
|
3. Path/fragment is optional.
|
||||||
*/
|
*/
|
||||||
const urlRegex = `${scheme}${host}+(?:${additional}|${endASCII})`;
|
const urlRegex = `${scheme}${host}${pathOrFragment}?`;
|
||||||
|
|
||||||
export const regex = new RegExp(urlRegex, "gui");
|
export const regex = new RegExp(urlRegex, "gi");
|
||||||
|
|
Reference in a new issue