Refactor regex to improve readability

- Split regex into components
- Add informative comments

Signed-off-by: RMidhunSuresh <rmidhunsuresh@gmail.com>
This commit is contained in:
RMidhunSuresh 2021-05-11 22:21:37 +05:30
parent 31740f4ec6
commit c6d7cef491

View file

@ -1 +1,29 @@
export const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:[\/#][^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui
const scheme = "(?:https|http|ftp):\\/\\/";
const host = "[a-zA-Z0-9:.\\[\\]-]";
/*
A URL containing path (/) or fragment (#) component
is allowed to end with any character which is not
space nor punctuation. The ending character may be
non-ASCII.
*/
const end = "[^\\s.,?!]";
const additional = `[\\/#][^\\s]*${end}`;
/*
Similarly, a URL not containing path or fragment must
also end with a character that is not space nor punctuation.
However the ending character must also be ASCII.
*/
const nonASCII = "\\u{80}-\\u{10ffff}";
const endASCII = `[^\\s${nonASCII}.,?!]`;
/*
URL must not contain non-ascii characters in host but may contain
them in path or fragment components.
https://matrix.org/<smiley> - valid
https://matrix.org<smiley> - invalid
*/
const urlRegex = `${scheme}${host}+(?:${additional}|${endASCII})`;
export const regex = new RegExp(urlRegex, "gui");