From 3c46a07a1e4bc04acc752379b237852dc0be9180 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Sat, 8 May 2021 19:02:44 +0530 Subject: [PATCH 01/35] Implement object format to represent chat messages Every chat text message can be split into parts such as text, newline and links. Signed-off-by: RMidhunSuresh --- src/domain/MessageObjectFormat.js | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/domain/MessageObjectFormat.js diff --git a/src/domain/MessageObjectFormat.js b/src/domain/MessageObjectFormat.js new file mode 100644 index 00000000..461856b4 --- /dev/null +++ b/src/domain/MessageObjectFormat.js @@ -0,0 +1,24 @@ +export class MessageObjectFormat { + + constructor(message = []) { + this._root = message; + } + + insertText(text) { + if (text.length) + this._root.push({ type: "text", text: text }); + } + + insertLink(link, displayText) { + this._root.push({ type: "link", url: link, text: displayText }); + } + + insertNewline() { + this._root.push({ type: "newline" }); + } + + [Symbol.iterator]() { + return this._root.values(); + } + +} From 32fd1eb318c81b5b0b5c1e68a1f2a929ebb0d20c Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Sat, 8 May 2021 19:17:43 +0530 Subject: [PATCH 02/35] Implement linkifier Signed-off-by: RMidhunSuresh --- src/domain/Linkifier.js | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/domain/Linkifier.js diff --git a/src/domain/Linkifier.js b/src/domain/Linkifier.js new file mode 100644 index 00000000..a429c8d3 --- /dev/null +++ b/src/domain/Linkifier.js @@ -0,0 +1,57 @@ +import { MessageObjectFormat } from "./MessageObjectFormat.js"; + +export class Linkifier { + + /** + * @param {String} text Text to linkify + */ + constructor(text) { + this._text = text; + this._curr = 0; + this._message = new MessageObjectFormat(); + } + + /** + * Separate string into text, newlines and add them into message object. + * @param {String} text + */ + _addTextToMessage(text) { + const components = text.split("\n"); + components.slice(0, -1).forEach(t => { + this._message.insertText(t); + this._message.insertNewline(); + }); + const [last] = components.slice(-1); + this._message.insertText(last); + } + + /** + * Add text from this._curr upto start of supplied match into message object. + * If match is not provided, everything from this._curr to the end of + * this._text is added as text to the message object. + * @param {Array} [match] regex match + */ + _handleText(match) { + const index = match?.index; + const text = this._text.slice(this._curr, index); + this._addTextToMessage(text); + const len = match?.[0].length; + this._curr = index + len; + } + + /** + * Splits message text into parts (text, newline and links) + * @returns {MessageObjectFormat} Object representation of chat message + */ + linkify() { + const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:\/[^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui + const matches = this._text.matchAll(regex); + for (let match of matches) { + const link = match[0]; + this._handleText(match); + this._message.insertLink(link, link); + } + this._handleText(); + return this._message; + } +} From 9ca0d93a01d8486f08f301bb67bcd163f26a78a6 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Sat, 8 May 2021 19:18:45 +0530 Subject: [PATCH 03/35] Add tests for linkify Signed-off-by: RMidhunSuresh --- src/domain/Linkifier.js | 116 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/src/domain/Linkifier.js b/src/domain/Linkifier.js index a429c8d3..0074e58c 100644 --- a/src/domain/Linkifier.js +++ b/src/domain/Linkifier.js @@ -55,3 +55,119 @@ export class Linkifier { return this._message; } } + +export function tests() { + + function linkify(text) { + const obj = new Linkifier(text); + return obj.linkify(); + } + + function test(assert, input, output) { + output = new MessageObjectFormat(output); + input = linkify(input); + assert.deepEqual(input, output); + } + + function testLink(assert, link, expectFail = false) { + const input = link; + const output = expectFail ? [{ type: "text", text: input }] : + [{ type: "link", url: input, text: input }]; + test(assert, input, output); + } + + return { + // Tests for text + "Text only": assert => { + const input = "This is a sentence"; + const output = [{ type: "text", text: input }]; + test(assert, input, output); + }, + + "Text with newline": assert => { + const input = "This is a sentence.\nThis is another sentence."; + const output = [ + { type: "text", text: "This is a sentence." }, + { type: "newline" }, + { type: "text", text: "This is another sentence." } + ]; + test(assert, input, output); + }, + + "Text with newline & trailing newline": assert => { + const input = "This is a sentence.\nThis is another sentence.\n"; + const output = [ + { type: "text", text: "This is a sentence." }, + { type: "newline" }, + { type: "text", text: "This is another sentence." }, + { type: "newline" } + ]; + test(assert, input, output); + }, + + // Tests for links + "Link with host": assert => { + testLink(assert, "https://matrix.org"); + }, + + "Link with host & path": assert => { + testLink(assert, "https://matrix.org/docs/develop"); + }, + + "Link with host & fragment": assert => { + testLink(assert, "https://matrix.org#test"); + }, + + "Link with host & query": assert => { + testLink(assert, "https://matrix.org/?foo=bar"); + }, + + "Complex link": assert => { + const link = "https://www.foobar.com/url?sa=t&rct=j&q=&esrc=s&source" + + "=web&cd=&cad=rja&uact=8&ved=2ahUKEwjyu7DJ-LHwAhUQyzgGHc" + + "OKA70QFjAAegQIBBAD&url=https%3A%2F%2Fmatrix.org%2Fdocs%" + + "2Fprojects%2Fclient%2Felement%2F&usg=AOvVaw0xpENrPHv_R-" + + "ERkyacR2Bd"; + testLink(assert, link); + }, + + "Localhost link": assert => { + testLink(assert, "http://localhost"); + testLink(assert, "http://localhost:3000"); + }, + + "IPV4 link": assert => { + testLink(assert, "https://192.0.0.1"); + testLink(assert, "https://250.123.67.23:5924"); + }, + + "IPV6 link": assert => { + testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"); + testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:7000"); + }, + + "Missing scheme must not linkify": assert => { + testLink(assert, "matrix.org/foo/bar", true); + }, + + "Punctuation at end of link must not linkify": assert => { + const link = "https://foo.bar/?nenjil=lal810"; + const end = ".,? "; + for (const char of end) { + const out = [{ type: "link", url: link, text: link }, { type: "text", text: char }]; + test(assert, link + char, out); + } + }, + + "Unicode in hostname must not linkify": assert => { + const link = "https://foo.bar\uD83D\uDE03.com"; + const out = [{ type: "link", url: "https://foo.bar", text: "https://foo.bar" }, + { type: "text", text: "\uD83D\uDE03.com" }]; + test(assert, link, out); + }, + + "Link with unicode only after / must linkify": assert => { + testLink(assert, "https://foo.bar.com/\uD83D\uDE03"); + } + }; +} From 2f883d049cb85dd5955d7d49d6e918f382454408 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Sat, 8 May 2021 19:20:07 +0530 Subject: [PATCH 04/35] Return new message format instead of message text Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/tiles/TextTile.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/domain/session/room/timeline/tiles/TextTile.js b/src/domain/session/room/timeline/tiles/TextTile.js index 8f5265d4..fc8db31e 100644 --- a/src/domain/session/room/timeline/tiles/TextTile.js +++ b/src/domain/session/room/timeline/tiles/TextTile.js @@ -15,15 +15,16 @@ limitations under the License. */ import {MessageTile} from "./MessageTile.js"; +import { Linkifier } from "../../../../Linkifier.js"; export class TextTile extends MessageTile { - get text() { + get messageFormat() { const content = this._getContent(); - const body = content && content.body; + let body = content && content.body; if (content.msgtype === "m.emote") { - return `* ${this.displayName} ${body}`; - } else { - return body; + body = `* ${this.displayName} ${body}`; } + const obj = new Linkifier(body); + return obj.linkify(); } } From bbeb0a5764debbfb4b4ae4714b9afd0690a9437a Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Sat, 8 May 2021 19:22:49 +0530 Subject: [PATCH 05/35] Transform new message format into DOM elements Signed-off-by: RMidhunSuresh --- .../session/room/timeline/TextMessageView.js | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/platform/web/ui/session/room/timeline/TextMessageView.js b/src/platform/web/ui/session/room/timeline/TextMessageView.js index 675b0035..e13ea037 100644 --- a/src/platform/web/ui/session/room/timeline/TextMessageView.js +++ b/src/platform/web/ui/session/room/timeline/TextMessageView.js @@ -21,7 +21,7 @@ import {renderMessage} from "./common.js"; export class TextMessageView extends TemplateView { render(t, vm) { - const bodyView = t.mapView(vm => vm.text, text => new BodyView(text)); + const bodyView = t.mapView(vm => vm.messageFormat, messageFormat => new BodyView(messageFormat)); return renderMessage(t, vm, [t.p([bodyView, t.time({className: {hidden: !vm.date}}, vm.date + " " + vm.time)])] ); @@ -29,20 +29,22 @@ export class TextMessageView extends TemplateView { } class BodyView extends StaticView { + + get _formatFunction() { + return { + text: (param) => text(param.obj.text), + link: (param) => param.t.a({ href: param.obj.url, target: "_blank", rel: "noopener" }, [text(param.obj.text)]), + newline: (param) => param.t.br() + }; + } + render(t, value) { - const lines = (value || "").split("\n"); - if (lines.length === 1) { - return text(lines[0]); + const children = []; + for (const m of value) { + const f = this._formatFunction[m.type]; + const element = f({ obj: m, t: t }); + children.push(element); } - const elements = []; - for (const line of lines) { - if (elements.length) { - elements.push(t.br()); - } - if (line.length) { - elements.push(t.span(line)); - } - } - return t.span(elements); + return t.span(children); } } From 06a6cae4afd6dce5b0517b0cc9ce6e3c9ca92593 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Mon, 10 May 2021 19:50:26 +0530 Subject: [PATCH 06/35] Rename MessageBodyBuilder to MessageBodyBuilder Signed-off-by: RMidhunSuresh --- src/domain/Linkifier.js | 8 ++++---- .../{MessageObjectFormat.js => MessageBodyBuilder.js} | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) rename src/domain/{MessageObjectFormat.js => MessageBodyBuilder.js} (92%) diff --git a/src/domain/Linkifier.js b/src/domain/Linkifier.js index 0074e58c..25d1adac 100644 --- a/src/domain/Linkifier.js +++ b/src/domain/Linkifier.js @@ -1,4 +1,4 @@ -import { MessageObjectFormat } from "./MessageObjectFormat.js"; +import { MessageBodyBuilder } from "./MessageBodyBuilder.js"; export class Linkifier { @@ -8,7 +8,7 @@ export class Linkifier { constructor(text) { this._text = text; this._curr = 0; - this._message = new MessageObjectFormat(); + this._message = new MessageBodyBuilder(); } /** @@ -41,7 +41,7 @@ export class Linkifier { /** * Splits message text into parts (text, newline and links) - * @returns {MessageObjectFormat} Object representation of chat message + * @returns {MessageBodyBuilder} Object representation of chat message */ linkify() { const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:\/[^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui @@ -64,7 +64,7 @@ export function tests() { } function test(assert, input, output) { - output = new MessageObjectFormat(output); + output = new MessageBodyBuilder(output); input = linkify(input); assert.deepEqual(input, output); } diff --git a/src/domain/MessageObjectFormat.js b/src/domain/MessageBodyBuilder.js similarity index 92% rename from src/domain/MessageObjectFormat.js rename to src/domain/MessageBodyBuilder.js index 461856b4..2ed8ed47 100644 --- a/src/domain/MessageObjectFormat.js +++ b/src/domain/MessageBodyBuilder.js @@ -1,4 +1,4 @@ -export class MessageObjectFormat { +export class MessageBodyBuilder { constructor(message = []) { this._root = message; From dca649dfd41d48b1126dfb48e89cdeb80055ae44 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Mon, 10 May 2021 19:53:40 +0530 Subject: [PATCH 07/35] Moved files into more appropriate locations Signed-off-by: RMidhunSuresh --- src/domain/{ => session/room/timeline}/Linkifier.js | 0 src/domain/{ => session/room/timeline}/MessageBodyBuilder.js | 0 src/domain/session/room/timeline/tiles/TextTile.js | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename src/domain/{ => session/room/timeline}/Linkifier.js (100%) rename src/domain/{ => session/room/timeline}/MessageBodyBuilder.js (100%) diff --git a/src/domain/Linkifier.js b/src/domain/session/room/timeline/Linkifier.js similarity index 100% rename from src/domain/Linkifier.js rename to src/domain/session/room/timeline/Linkifier.js diff --git a/src/domain/MessageBodyBuilder.js b/src/domain/session/room/timeline/MessageBodyBuilder.js similarity index 100% rename from src/domain/MessageBodyBuilder.js rename to src/domain/session/room/timeline/MessageBodyBuilder.js diff --git a/src/domain/session/room/timeline/tiles/TextTile.js b/src/domain/session/room/timeline/tiles/TextTile.js index fc8db31e..e72492c9 100644 --- a/src/domain/session/room/timeline/tiles/TextTile.js +++ b/src/domain/session/room/timeline/tiles/TextTile.js @@ -15,7 +15,7 @@ limitations under the License. */ import {MessageTile} from "./MessageTile.js"; -import { Linkifier } from "../../../../Linkifier.js"; +import { Linkifier } from "../Linkifier.js"; export class TextTile extends MessageTile { get messageFormat() { From 787d438a746a313da65150d28437040d20c1c100 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Mon, 10 May 2021 21:30:11 +0530 Subject: [PATCH 08/35] Refactored Linkifier class into single function Modified design so that linkify interacts with MessageBodyBuilder through callbacks. Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/Linkifier.js | 173 ------------------ src/domain/session/room/timeline/linkify.js | 12 ++ 2 files changed, 12 insertions(+), 173 deletions(-) delete mode 100644 src/domain/session/room/timeline/Linkifier.js create mode 100644 src/domain/session/room/timeline/linkify.js diff --git a/src/domain/session/room/timeline/Linkifier.js b/src/domain/session/room/timeline/Linkifier.js deleted file mode 100644 index 25d1adac..00000000 --- a/src/domain/session/room/timeline/Linkifier.js +++ /dev/null @@ -1,173 +0,0 @@ -import { MessageBodyBuilder } from "./MessageBodyBuilder.js"; - -export class Linkifier { - - /** - * @param {String} text Text to linkify - */ - constructor(text) { - this._text = text; - this._curr = 0; - this._message = new MessageBodyBuilder(); - } - - /** - * Separate string into text, newlines and add them into message object. - * @param {String} text - */ - _addTextToMessage(text) { - const components = text.split("\n"); - components.slice(0, -1).forEach(t => { - this._message.insertText(t); - this._message.insertNewline(); - }); - const [last] = components.slice(-1); - this._message.insertText(last); - } - - /** - * Add text from this._curr upto start of supplied match into message object. - * If match is not provided, everything from this._curr to the end of - * this._text is added as text to the message object. - * @param {Array} [match] regex match - */ - _handleText(match) { - const index = match?.index; - const text = this._text.slice(this._curr, index); - this._addTextToMessage(text); - const len = match?.[0].length; - this._curr = index + len; - } - - /** - * Splits message text into parts (text, newline and links) - * @returns {MessageBodyBuilder} Object representation of chat message - */ - linkify() { - const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:\/[^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui - const matches = this._text.matchAll(regex); - for (let match of matches) { - const link = match[0]; - this._handleText(match); - this._message.insertLink(link, link); - } - this._handleText(); - return this._message; - } -} - -export function tests() { - - function linkify(text) { - const obj = new Linkifier(text); - return obj.linkify(); - } - - function test(assert, input, output) { - output = new MessageBodyBuilder(output); - input = linkify(input); - assert.deepEqual(input, output); - } - - function testLink(assert, link, expectFail = false) { - const input = link; - const output = expectFail ? [{ type: "text", text: input }] : - [{ type: "link", url: input, text: input }]; - test(assert, input, output); - } - - return { - // Tests for text - "Text only": assert => { - const input = "This is a sentence"; - const output = [{ type: "text", text: input }]; - test(assert, input, output); - }, - - "Text with newline": assert => { - const input = "This is a sentence.\nThis is another sentence."; - const output = [ - { type: "text", text: "This is a sentence." }, - { type: "newline" }, - { type: "text", text: "This is another sentence." } - ]; - test(assert, input, output); - }, - - "Text with newline & trailing newline": assert => { - const input = "This is a sentence.\nThis is another sentence.\n"; - const output = [ - { type: "text", text: "This is a sentence." }, - { type: "newline" }, - { type: "text", text: "This is another sentence." }, - { type: "newline" } - ]; - test(assert, input, output); - }, - - // Tests for links - "Link with host": assert => { - testLink(assert, "https://matrix.org"); - }, - - "Link with host & path": assert => { - testLink(assert, "https://matrix.org/docs/develop"); - }, - - "Link with host & fragment": assert => { - testLink(assert, "https://matrix.org#test"); - }, - - "Link with host & query": assert => { - testLink(assert, "https://matrix.org/?foo=bar"); - }, - - "Complex link": assert => { - const link = "https://www.foobar.com/url?sa=t&rct=j&q=&esrc=s&source" + - "=web&cd=&cad=rja&uact=8&ved=2ahUKEwjyu7DJ-LHwAhUQyzgGHc" + - "OKA70QFjAAegQIBBAD&url=https%3A%2F%2Fmatrix.org%2Fdocs%" + - "2Fprojects%2Fclient%2Felement%2F&usg=AOvVaw0xpENrPHv_R-" + - "ERkyacR2Bd"; - testLink(assert, link); - }, - - "Localhost link": assert => { - testLink(assert, "http://localhost"); - testLink(assert, "http://localhost:3000"); - }, - - "IPV4 link": assert => { - testLink(assert, "https://192.0.0.1"); - testLink(assert, "https://250.123.67.23:5924"); - }, - - "IPV6 link": assert => { - testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"); - testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:7000"); - }, - - "Missing scheme must not linkify": assert => { - testLink(assert, "matrix.org/foo/bar", true); - }, - - "Punctuation at end of link must not linkify": assert => { - const link = "https://foo.bar/?nenjil=lal810"; - const end = ".,? "; - for (const char of end) { - const out = [{ type: "link", url: link, text: link }, { type: "text", text: char }]; - test(assert, link + char, out); - } - }, - - "Unicode in hostname must not linkify": assert => { - const link = "https://foo.bar\uD83D\uDE03.com"; - const out = [{ type: "link", url: "https://foo.bar", text: "https://foo.bar" }, - { type: "text", text: "\uD83D\uDE03.com" }]; - test(assert, link, out); - }, - - "Link with unicode only after / must linkify": assert => { - testLink(assert, "https://foo.bar.com/\uD83D\uDE03"); - } - }; -} diff --git a/src/domain/session/room/timeline/linkify.js b/src/domain/session/room/timeline/linkify.js new file mode 100644 index 00000000..01dc8a4b --- /dev/null +++ b/src/domain/session/room/timeline/linkify.js @@ -0,0 +1,12 @@ +export function linkify(text, callback) { + const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:\/[^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui + const matches = text.matchAll(regex); + let curr = 0; + for (let match of matches) { + callback(match[0], true); + callback(text.slice(curr, match.index), false); + const len = match[0].length; + curr = match.index + len; + } + callback(text.slice(curr), false); +} From 55f2d79d077a83eeecb0bc34ab16cc222d5df57d Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Mon, 10 May 2021 21:37:27 +0530 Subject: [PATCH 09/35] Move newline handling to MessageBodyBuilder - Also add a insert method to reflect new design. Signed-off-by: RMidhunSuresh --- .../room/timeline/MessageBodyBuilder.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/domain/session/room/timeline/MessageBodyBuilder.js b/src/domain/session/room/timeline/MessageBodyBuilder.js index 2ed8ed47..3b297cbd 100644 --- a/src/domain/session/room/timeline/MessageBodyBuilder.js +++ b/src/domain/session/room/timeline/MessageBodyBuilder.js @@ -1,9 +1,28 @@ +import { linkify } from "./linkify.js"; + export class MessageBodyBuilder { constructor(message = []) { this._root = message; } + fromText(text) { + const components = text.split("\n"); + components.slice(0, -1).forEach(t => { + linkify(t, this.insert.bind(this)); + }); + const [last] = components.slice(-1); + linkify(last, this.insert.bind(this)); + } + + insert(text, isLink) { + if (!text.length) return; + if (isLink) + this.insertLink(text, text); + else + this.insertText(text); + } + insertText(text) { if (text.length) this._root.push({ type: "text", text: text }); From fc3b391aa929eee784b4827d75952df5890fdc26 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Mon, 10 May 2021 21:41:07 +0530 Subject: [PATCH 10/35] Use MessageBodyBuilder instead of linkify Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/tiles/TextTile.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/domain/session/room/timeline/tiles/TextTile.js b/src/domain/session/room/timeline/tiles/TextTile.js index e72492c9..1f940970 100644 --- a/src/domain/session/room/timeline/tiles/TextTile.js +++ b/src/domain/session/room/timeline/tiles/TextTile.js @@ -15,7 +15,7 @@ limitations under the License. */ import {MessageTile} from "./MessageTile.js"; -import { Linkifier } from "../Linkifier.js"; +import { MessageBodyBuilder } from "../MessageBodyBuilder.js"; export class TextTile extends MessageTile { get messageFormat() { @@ -24,7 +24,8 @@ export class TextTile extends MessageTile { if (content.msgtype === "m.emote") { body = `* ${this.displayName} ${body}`; } - const obj = new Linkifier(body); - return obj.linkify(); + const message = new MessageBodyBuilder(); + message.fromText(body); + return message; } } From 17be7c4f4d03f48548c0108a509a470448825a3e Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Mon, 10 May 2021 22:10:50 +0530 Subject: [PATCH 11/35] Add newline to message Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/MessageBodyBuilder.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/domain/session/room/timeline/MessageBodyBuilder.js b/src/domain/session/room/timeline/MessageBodyBuilder.js index 3b297cbd..c1dc9222 100644 --- a/src/domain/session/room/timeline/MessageBodyBuilder.js +++ b/src/domain/session/room/timeline/MessageBodyBuilder.js @@ -10,6 +10,7 @@ export class MessageBodyBuilder { const components = text.split("\n"); components.slice(0, -1).forEach(t => { linkify(t, this.insert.bind(this)); + this.insertNewline(); }); const [last] = components.slice(-1); linkify(last, this.insert.bind(this)); From 03b971d8987616f529576bbb792542bb3021f844 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 12:55:39 +0530 Subject: [PATCH 12/35] Added tests for new linkify Signed-off-by: RMidhunSuresh --- .../room/timeline/MessageBodyBuilder.js | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/src/domain/session/room/timeline/MessageBodyBuilder.js b/src/domain/session/room/timeline/MessageBodyBuilder.js index c1dc9222..7c4f7a56 100644 --- a/src/domain/session/room/timeline/MessageBodyBuilder.js +++ b/src/domain/session/room/timeline/MessageBodyBuilder.js @@ -42,3 +42,120 @@ export class MessageBodyBuilder { } } + +export function tests() { + + function linkify(text) { + const obj = new MessageBodyBuilder(); + obj.fromText(text); + return obj; + } + + function test(assert, input, output) { + output = new MessageBodyBuilder(output); + input = linkify(input); + assert.deepEqual(input, output); + } + + function testLink(assert, link, expectFail = false) { + const input = link; + const output = expectFail ? [{ type: "text", text: input }] : + [{ type: "link", url: input, text: input }]; + test(assert, input, output); + } + + return { + // Tests for text + "Text only": assert => { + const input = "This is a sentence"; + const output = [{ type: "text", text: input }]; + test(assert, input, output); + }, + + "Text with newline": assert => { + const input = "This is a sentence.\nThis is another sentence."; + const output = [ + { type: "text", text: "This is a sentence." }, + { type: "newline" }, + { type: "text", text: "This is another sentence." } + ]; + test(assert, input, output); + }, + + "Text with newline & trailing newline": assert => { + const input = "This is a sentence.\nThis is another sentence.\n"; + const output = [ + { type: "text", text: "This is a sentence." }, + { type: "newline" }, + { type: "text", text: "This is another sentence." }, + { type: "newline" } + ]; + test(assert, input, output); + }, + + // Tests for links + "Link with host": assert => { + testLink(assert, "https://matrix.org"); + }, + + "Link with host & path": assert => { + testLink(assert, "https://matrix.org/docs/develop"); + }, + + "Link with host & fragment": assert => { + testLink(assert, "https://matrix.org#test"); + }, + + "Link with host & query": assert => { + testLink(assert, "https://matrix.org/?foo=bar"); + }, + + "Complex link": assert => { + const link = "https://www.foobar.com/url?sa=t&rct=j&q=&esrc=s&source" + + "=web&cd=&cad=rja&uact=8&ved=2ahUKEwjyu7DJ-LHwAhUQyzgGHc" + + "OKA70QFjAAegQIBBAD&url=https%3A%2F%2Fmatrix.org%2Fdocs%" + + "2Fprojects%2Fclient%2Felement%2F&usg=AOvVaw0xpENrPHv_R-" + + "ERkyacR2Bd"; + testLink(assert, link); + }, + + "Localhost link": assert => { + testLink(assert, "http://localhost"); + testLink(assert, "http://localhost:3000"); + }, + + "IPV4 link": assert => { + testLink(assert, "https://192.0.0.1"); + testLink(assert, "https://250.123.67.23:5924"); + }, + + "IPV6 link": assert => { + testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"); + testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:7000"); + }, + + "Missing scheme must not linkify": assert => { + testLink(assert, "matrix.org/foo/bar", true); + }, + + "Punctuation at end of link must not linkify": assert => { + const link = "https://foo.bar/?nenjil=lal810"; + const end = ".,? "; + for (const char of end) { + const out = [{ type: "link", url: link, text: link }, { type: "text", text: char }]; + test(assert, link + char, out); + } + }, + + "Unicode in hostname must not linkify": assert => { + const link = "https://foo.bar\uD83D\uDE03.com"; + const out = [{ type: "link", url: "https://foo.bar", text: "https://foo.bar" }, + { type: "text", text: "\uD83D\uDE03.com" }]; + test(assert, link, out); + }, + + "Link with unicode only after / must linkify": assert => { + testLink(assert, "https://foo.bar.com/\uD83D\uDE03"); + } + }; +} From 577883a1d4f5fdbb40f20885dd460caeab8594e1 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 13:31:33 +0530 Subject: [PATCH 13/35] Refactor loop Don't handle last element separately Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/MessageBodyBuilder.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/domain/session/room/timeline/MessageBodyBuilder.js b/src/domain/session/room/timeline/MessageBodyBuilder.js index 7c4f7a56..2831fdd0 100644 --- a/src/domain/session/room/timeline/MessageBodyBuilder.js +++ b/src/domain/session/room/timeline/MessageBodyBuilder.js @@ -8,12 +8,12 @@ export class MessageBodyBuilder { fromText(text) { const components = text.split("\n"); - components.slice(0, -1).forEach(t => { - linkify(t, this.insert.bind(this)); - this.insertNewline(); + components.flatMap(e => ["\n", e]).slice(1).forEach(e => { + if (e === "\n") + this.insertNewline(); + else + linkify(e, this.insert.bind(this)); }); - const [last] = components.slice(-1); - linkify(last, this.insert.bind(this)); } insert(text, isLink) { From 070bc961868f1005ccabd14dc40d3db39c9e9b58 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 13:43:54 +0530 Subject: [PATCH 14/35] Fallback to empty string if content is undefined Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/tiles/TextTile.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain/session/room/timeline/tiles/TextTile.js b/src/domain/session/room/timeline/tiles/TextTile.js index 1f940970..5a1bdb78 100644 --- a/src/domain/session/room/timeline/tiles/TextTile.js +++ b/src/domain/session/room/timeline/tiles/TextTile.js @@ -20,7 +20,7 @@ import { MessageBodyBuilder } from "../MessageBodyBuilder.js"; export class TextTile extends MessageTile { get messageFormat() { const content = this._getContent(); - let body = content && content.body; + let body = content?.body || ""; if (content.msgtype === "m.emote") { body = `* ${this.displayName} ${body}`; } From 3beaf88a8b9d51cd9cd4c91eea360d8db924d541 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 13:46:38 +0530 Subject: [PATCH 15/35] Rename messageFormat to body Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/tiles/TextTile.js | 2 +- src/platform/web/ui/session/room/timeline/TextMessageView.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/domain/session/room/timeline/tiles/TextTile.js b/src/domain/session/room/timeline/tiles/TextTile.js index 5a1bdb78..ba898963 100644 --- a/src/domain/session/room/timeline/tiles/TextTile.js +++ b/src/domain/session/room/timeline/tiles/TextTile.js @@ -18,7 +18,7 @@ import {MessageTile} from "./MessageTile.js"; import { MessageBodyBuilder } from "../MessageBodyBuilder.js"; export class TextTile extends MessageTile { - get messageFormat() { + get body() { const content = this._getContent(); let body = content?.body || ""; if (content.msgtype === "m.emote") { diff --git a/src/platform/web/ui/session/room/timeline/TextMessageView.js b/src/platform/web/ui/session/room/timeline/TextMessageView.js index e13ea037..d78c49da 100644 --- a/src/platform/web/ui/session/room/timeline/TextMessageView.js +++ b/src/platform/web/ui/session/room/timeline/TextMessageView.js @@ -21,7 +21,7 @@ import {renderMessage} from "./common.js"; export class TextMessageView extends TemplateView { render(t, vm) { - const bodyView = t.mapView(vm => vm.messageFormat, messageFormat => new BodyView(messageFormat)); + const bodyView = t.mapView(vm => vm.body, body => new BodyView(body)); return renderMessage(t, vm, [t.p([bodyView, t.time({className: {hidden: !vm.date}}, vm.date + " " + vm.time)])] ); From 051771397cbab7b78f2b8fdcc6b052dec1db9c64 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 13:59:45 +0530 Subject: [PATCH 16/35] Fix bug in which links preceeded text Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain/session/room/timeline/linkify.js b/src/domain/session/room/timeline/linkify.js index 01dc8a4b..628d8c68 100644 --- a/src/domain/session/room/timeline/linkify.js +++ b/src/domain/session/room/timeline/linkify.js @@ -3,8 +3,8 @@ export function linkify(text, callback) { const matches = text.matchAll(regex); let curr = 0; for (let match of matches) { - callback(match[0], true); callback(text.slice(curr, match.index), false); + callback(match[0], true); const len = match[0].length; curr = match.index + len; } From e4a143659ce5d470bfb0aab816d4dc99c23e8d74 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 14:02:25 +0530 Subject: [PATCH 17/35] Refactor unwanted getter into constant object Signed-off-by: RMidhunSuresh --- .../ui/session/room/timeline/TextMessageView.js | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/platform/web/ui/session/room/timeline/TextMessageView.js b/src/platform/web/ui/session/room/timeline/TextMessageView.js index d78c49da..c07c1075 100644 --- a/src/platform/web/ui/session/room/timeline/TextMessageView.js +++ b/src/platform/web/ui/session/room/timeline/TextMessageView.js @@ -28,20 +28,17 @@ export class TextMessageView extends TemplateView { } } +const formatFunction = { + text: (param) => text(param.obj.text), + link: (param) => param.t.a({ href: param.obj.url, target: "_blank", rel: "noopener" }, [text(param.obj.text)]), + newline: (param) => param.t.br() +}; + class BodyView extends StaticView { - - get _formatFunction() { - return { - text: (param) => text(param.obj.text), - link: (param) => param.t.a({ href: param.obj.url, target: "_blank", rel: "noopener" }, [text(param.obj.text)]), - newline: (param) => param.t.br() - }; - } - render(t, value) { const children = []; for (const m of value) { - const f = this._formatFunction[m.type]; + const f = formatFunction[m.type]; const element = f({ obj: m, t: t }); children.push(element); } From 86f4b6186a1d15d3cc9de2ecd6fdc68866d3104d Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 15:03:20 +0530 Subject: [PATCH 18/35] Refactor functions to accept single parameter Signed-off-by: RMidhunSuresh --- .../web/ui/session/room/timeline/TextMessageView.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/platform/web/ui/session/room/timeline/TextMessageView.js b/src/platform/web/ui/session/room/timeline/TextMessageView.js index c07c1075..b9eda412 100644 --- a/src/platform/web/ui/session/room/timeline/TextMessageView.js +++ b/src/platform/web/ui/session/room/timeline/TextMessageView.js @@ -16,7 +16,7 @@ limitations under the License. import {TemplateView} from "../../../general/TemplateView.js"; import {StaticView} from "../../../general/StaticView.js"; -import {text} from "../../../general/html.js"; +import { tag, text } from "../../../general/html.js"; import {renderMessage} from "./common.js"; export class TextMessageView extends TemplateView { @@ -29,9 +29,9 @@ export class TextMessageView extends TemplateView { } const formatFunction = { - text: (param) => text(param.obj.text), - link: (param) => param.t.a({ href: param.obj.url, target: "_blank", rel: "noopener" }, [text(param.obj.text)]), - newline: (param) => param.t.br() + text: (m) => text(m.text), + link: (m) => tag.a({ href: m.url, target: "_blank", rel: "noopener" }, [text(m.text)]), + newline: () => tag.br() }; class BodyView extends StaticView { @@ -39,7 +39,7 @@ class BodyView extends StaticView { const children = []; for (const m of value) { const f = formatFunction[m.type]; - const element = f({ obj: m, t: t }); + const element = f(m); children.push(element); } return t.span(children); From 3830a95af22bbdc2b37ffc4dc52ddb36fbaa9a6e Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 16:03:14 +0530 Subject: [PATCH 19/35] Cache result in TextTile Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/tiles/TextTile.js | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/domain/session/room/timeline/tiles/TextTile.js b/src/domain/session/room/timeline/tiles/TextTile.js index ba898963..34089b81 100644 --- a/src/domain/session/room/timeline/tiles/TextTile.js +++ b/src/domain/session/room/timeline/tiles/TextTile.js @@ -18,14 +18,22 @@ import {MessageTile} from "./MessageTile.js"; import { MessageBodyBuilder } from "../MessageBodyBuilder.js"; export class TextTile extends MessageTile { - get body() { + + get _contentBody() { const content = this._getContent(); let body = content?.body || ""; - if (content.msgtype === "m.emote") { + if (content.msgtype === "m.emote") body = `* ${this.displayName} ${body}`; - } + return body; + } + + get body() { + const body = this._contentBody; + if (body === this._body) + return this._message; const message = new MessageBodyBuilder(); message.fromText(body); + [this._body, this._message] = [body, message]; return message; } } From dbe76e973e9efd668b649c4cca271ad083ac622d Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 17:59:03 +0530 Subject: [PATCH 20/35] Add braces to all blocks Signed-off-by: RMidhunSuresh --- .../room/timeline/MessageBodyBuilder.js | 19 +++++++++++++------ .../session/room/timeline/tiles/TextTile.js | 6 ++++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/domain/session/room/timeline/MessageBodyBuilder.js b/src/domain/session/room/timeline/MessageBodyBuilder.js index 2831fdd0..4d5c73e7 100644 --- a/src/domain/session/room/timeline/MessageBodyBuilder.js +++ b/src/domain/session/room/timeline/MessageBodyBuilder.js @@ -9,24 +9,31 @@ export class MessageBodyBuilder { fromText(text) { const components = text.split("\n"); components.flatMap(e => ["\n", e]).slice(1).forEach(e => { - if (e === "\n") + if (e === "\n") { this.insertNewline(); - else + } + else { linkify(e, this.insert.bind(this)); + } }); } insert(text, isLink) { - if (!text.length) return; - if (isLink) + if (!text.length) { + return; + } + if (isLink) { this.insertLink(text, text); - else + } + else { this.insertText(text); + } } insertText(text) { - if (text.length) + if (text.length) { this._root.push({ type: "text", text: text }); + } } insertLink(link, displayText) { diff --git a/src/domain/session/room/timeline/tiles/TextTile.js b/src/domain/session/room/timeline/tiles/TextTile.js index 34089b81..88e281d2 100644 --- a/src/domain/session/room/timeline/tiles/TextTile.js +++ b/src/domain/session/room/timeline/tiles/TextTile.js @@ -22,15 +22,17 @@ export class TextTile extends MessageTile { get _contentBody() { const content = this._getContent(); let body = content?.body || ""; - if (content.msgtype === "m.emote") + if (content.msgtype === "m.emote") { body = `* ${this.displayName} ${body}`; + } return body; } get body() { const body = this._contentBody; - if (body === this._body) + if (body === this._body) { return this._message; + } const message = new MessageBodyBuilder(); message.fromText(body); [this._body, this._message] = [body, message]; From 03208b88d5d190ee55c45e3f63080b76d545c527 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 20:19:29 +0530 Subject: [PATCH 21/35] Move link tests to linkify Signed-off-by: RMidhunSuresh --- .../room/timeline/MessageBodyBuilder.js | 72 -------------- src/domain/session/room/timeline/linkify.js | 95 +++++++++++++++++++ 2 files changed, 95 insertions(+), 72 deletions(-) diff --git a/src/domain/session/room/timeline/MessageBodyBuilder.js b/src/domain/session/room/timeline/MessageBodyBuilder.js index 4d5c73e7..edaa4c83 100644 --- a/src/domain/session/room/timeline/MessageBodyBuilder.js +++ b/src/domain/session/room/timeline/MessageBodyBuilder.js @@ -64,13 +64,6 @@ export function tests() { assert.deepEqual(input, output); } - function testLink(assert, link, expectFail = false) { - const input = link; - const output = expectFail ? [{ type: "text", text: input }] : - [{ type: "link", url: input, text: input }]; - test(assert, input, output); - } - return { // Tests for text "Text only": assert => { @@ -98,71 +91,6 @@ export function tests() { { type: "newline" } ]; test(assert, input, output); - }, - - // Tests for links - "Link with host": assert => { - testLink(assert, "https://matrix.org"); - }, - - "Link with host & path": assert => { - testLink(assert, "https://matrix.org/docs/develop"); - }, - - "Link with host & fragment": assert => { - testLink(assert, "https://matrix.org#test"); - }, - - "Link with host & query": assert => { - testLink(assert, "https://matrix.org/?foo=bar"); - }, - - "Complex link": assert => { - const link = "https://www.foobar.com/url?sa=t&rct=j&q=&esrc=s&source" + - "=web&cd=&cad=rja&uact=8&ved=2ahUKEwjyu7DJ-LHwAhUQyzgGHc" + - "OKA70QFjAAegQIBBAD&url=https%3A%2F%2Fmatrix.org%2Fdocs%" + - "2Fprojects%2Fclient%2Felement%2F&usg=AOvVaw0xpENrPHv_R-" + - "ERkyacR2Bd"; - testLink(assert, link); - }, - - "Localhost link": assert => { - testLink(assert, "http://localhost"); - testLink(assert, "http://localhost:3000"); - }, - - "IPV4 link": assert => { - testLink(assert, "https://192.0.0.1"); - testLink(assert, "https://250.123.67.23:5924"); - }, - - "IPV6 link": assert => { - testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"); - testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:7000"); - }, - - "Missing scheme must not linkify": assert => { - testLink(assert, "matrix.org/foo/bar", true); - }, - - "Punctuation at end of link must not linkify": assert => { - const link = "https://foo.bar/?nenjil=lal810"; - const end = ".,? "; - for (const char of end) { - const out = [{ type: "link", url: link, text: link }, { type: "text", text: char }]; - test(assert, link + char, out); - } - }, - - "Unicode in hostname must not linkify": assert => { - const link = "https://foo.bar\uD83D\uDE03.com"; - const out = [{ type: "link", url: "https://foo.bar", text: "https://foo.bar" }, - { type: "text", text: "\uD83D\uDE03.com" }]; - test(assert, link, out); - }, - - "Link with unicode only after / must linkify": assert => { - testLink(assert, "https://foo.bar.com/\uD83D\uDE03"); } }; } diff --git a/src/domain/session/room/timeline/linkify.js b/src/domain/session/room/timeline/linkify.js index 628d8c68..620f38c1 100644 --- a/src/domain/session/room/timeline/linkify.js +++ b/src/domain/session/room/timeline/linkify.js @@ -10,3 +10,98 @@ export function linkify(text, callback) { } callback(text.slice(curr), false); } + +export function tests() { + + class MockCallback { + mockCallback(text, isLink) { + if (!text.length) { + return; + } + if (!this.result) { + this.result = []; + } + const type = isLink ? "link" : "text"; + this.result.push({ type: type, text: text }); + } + } + + function test(assert, input, output) { + const m = new MockCallback; + linkify(input, m.mockCallback.bind(m)); + assert.deepEqual(output, m.result); + } + + function testLink(assert, link, expectFail = false) { + const input = link; + const output = expectFail ? [{ type: "text", text: input }] : + [{ type: "link", text: input }]; + test(assert, input, output); + } + + return { + "Link with host": assert => { + testLink(assert, "https://matrix.org"); + }, + + "Link with host & path": assert => { + testLink(assert, "https://matrix.org/docs/develop"); + }, + + "Link with host & fragment": assert => { + testLink(assert, "https://matrix.org#test"); + }, + + "Link with host & query": assert => { + testLink(assert, "https://matrix.org/?foo=bar"); + }, + + "Complex link": assert => { + const link = "https://www.foobar.com/url?sa=t&rct=j&q=&esrc=s&source" + + "=web&cd=&cad=rja&uact=8&ved=2ahUKEwjyu7DJ-LHwAhUQyzgGHc" + + "OKA70QFjAAegQIBBAD&url=https%3A%2F%2Fmatrix.org%2Fdocs%" + + "2Fprojects%2Fclient%2Felement%2F&usg=AOvVaw0xpENrPHv_R-" + + "ERkyacR2Bd"; + testLink(assert, link); + }, + + "Localhost link": assert => { + testLink(assert, "http://localhost"); + testLink(assert, "http://localhost:3000"); + }, + + "IPV4 link": assert => { + testLink(assert, "https://192.0.0.1"); + testLink(assert, "https://250.123.67.23:5924"); + }, + + "IPV6 link": assert => { + testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"); + testLink(assert, "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:7000"); + }, + + "Missing scheme must not linkify": assert => { + testLink(assert, "matrix.org/foo/bar", true); + }, + + "Punctuation at end of link must not linkify": assert => { + const link = "https://foo.bar/?nenjil=lal810"; + const end = ".,? "; + for (const char of end) { + const out = [{ type: "link", text: link }, { type: "text", text: char }]; + test(assert, link + char, out); + } + }, + + "Unicode in hostname must not linkify": assert => { + const link = "https://foo.bar\uD83D\uDE03.com"; + const out = [{ type: "link", text: "https://foo.bar" }, + { type: "text", text: "\uD83D\uDE03.com" }]; + test(assert, link, out); + }, + + "Link with unicode only after / must linkify": assert => { + testLink(assert, "https://foo.bar.com/\uD83D\uDE03"); + } + }; +} From 7be9f939090da7cec5463185da831ba506f8de76 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 20:32:36 +0530 Subject: [PATCH 22/35] Make regex module-level constant Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/domain/session/room/timeline/linkify.js b/src/domain/session/room/timeline/linkify.js index 620f38c1..6e877954 100644 --- a/src/domain/session/room/timeline/linkify.js +++ b/src/domain/session/room/timeline/linkify.js @@ -1,5 +1,6 @@ +const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:\/[^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui + export function linkify(text, callback) { - const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:\/[^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui const matches = text.matchAll(regex); let curr = 0; for (let match of matches) { From e20810b0cb34875ea354326a18882fd77561ba04 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 20:44:39 +0530 Subject: [PATCH 23/35] Add test for unicode after fragment without path Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/domain/session/room/timeline/linkify.js b/src/domain/session/room/timeline/linkify.js index 6e877954..4bcf98e8 100644 --- a/src/domain/session/room/timeline/linkify.js +++ b/src/domain/session/room/timeline/linkify.js @@ -103,6 +103,10 @@ export function tests() { "Link with unicode only after / must linkify": assert => { testLink(assert, "https://foo.bar.com/\uD83D\uDE03"); + }, + + "Link with unicode after fragment without path must linkify": assert => { + testLink(assert, "https://foo.bar.com#\uD83D\uDE03"); } }; } From 027f6020e76b1de94963082c58d50d9e2c6c71b2 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 22:15:36 +0530 Subject: [PATCH 24/35] Modified regex to allow unicode in fragment Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain/session/room/timeline/linkify.js b/src/domain/session/room/timeline/linkify.js index 4bcf98e8..bcc722a4 100644 --- a/src/domain/session/room/timeline/linkify.js +++ b/src/domain/session/room/timeline/linkify.js @@ -1,4 +1,4 @@ -const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:\/[^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui +const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:[\/#][^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui export function linkify(text, callback) { const matches = text.matchAll(regex); From 6f0362802a9692e6aac975b971d13852b85c2573 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 22:17:08 +0530 Subject: [PATCH 25/35] Move linkify.js to separate directory Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/MessageBodyBuilder.js | 2 +- src/domain/session/room/timeline/{ => linkify}/linkify.js | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/domain/session/room/timeline/{ => linkify}/linkify.js (100%) diff --git a/src/domain/session/room/timeline/MessageBodyBuilder.js b/src/domain/session/room/timeline/MessageBodyBuilder.js index edaa4c83..f1b34462 100644 --- a/src/domain/session/room/timeline/MessageBodyBuilder.js +++ b/src/domain/session/room/timeline/MessageBodyBuilder.js @@ -1,4 +1,4 @@ -import { linkify } from "./linkify.js"; +import { linkify } from "./linkify/linkify.js"; export class MessageBodyBuilder { diff --git a/src/domain/session/room/timeline/linkify.js b/src/domain/session/room/timeline/linkify/linkify.js similarity index 100% rename from src/domain/session/room/timeline/linkify.js rename to src/domain/session/room/timeline/linkify/linkify.js From 31740f4ec613e81005f6a95e800145d827d56353 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 22:18:52 +0530 Subject: [PATCH 26/35] Put regex into separate file Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify/linkify.js | 2 +- src/domain/session/room/timeline/linkify/regex.js | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 src/domain/session/room/timeline/linkify/regex.js diff --git a/src/domain/session/room/timeline/linkify/linkify.js b/src/domain/session/room/timeline/linkify/linkify.js index bcc722a4..6ebf4e9f 100644 --- a/src/domain/session/room/timeline/linkify/linkify.js +++ b/src/domain/session/room/timeline/linkify/linkify.js @@ -1,4 +1,4 @@ -const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:[\/#][^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui +import { regex } from "./regex.js"; export function linkify(text, callback) { const matches = text.matchAll(regex); diff --git a/src/domain/session/room/timeline/linkify/regex.js b/src/domain/session/room/timeline/linkify/regex.js new file mode 100644 index 00000000..fe8c94eb --- /dev/null +++ b/src/domain/session/room/timeline/linkify/regex.js @@ -0,0 +1 @@ +export const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:[\/#][^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui From c6d7cef491d1202c3d535ca24d92e746bee50d99 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 22:21:37 +0530 Subject: [PATCH 27/35] Refactor regex to improve readability - Split regex into components - Add informative comments Signed-off-by: RMidhunSuresh --- .../session/room/timeline/linkify/regex.js | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/domain/session/room/timeline/linkify/regex.js b/src/domain/session/room/timeline/linkify/regex.js index fe8c94eb..7530d301 100644 --- a/src/domain/session/room/timeline/linkify/regex.js +++ b/src/domain/session/room/timeline/linkify/regex.js @@ -1 +1,29 @@ -export const regex = /(?:https|http|ftp):\/\/[a-zA-Z0-9:.\[\]#-]+(?:[\/#][^\s]*[^\s.,?!]|[^\s\u{80}-\u{10ffff}.,?!])/gui +const scheme = "(?:https|http|ftp):\\/\\/"; +const host = "[a-zA-Z0-9:.\\[\\]-]"; + +/* +A URL containing path (/) or fragment (#) component +is allowed to end with any character which is not +space nor punctuation. The ending character may be +non-ASCII. +*/ +const end = "[^\\s.,?!]"; +const additional = `[\\/#][^\\s]*${end}`; + +/* +Similarly, a URL not containing path or fragment must +also end with a character that is not space nor punctuation. +However the ending character must also be ASCII. +*/ +const nonASCII = "\\u{80}-\\u{10ffff}"; +const endASCII = `[^\\s${nonASCII}.,?!]`; + +/* +URL must not contain non-ascii characters in host but may contain +them in path or fragment components. +https://matrix.org/ - valid +https://matrix.org - invalid +*/ +const urlRegex = `${scheme}${host}+(?:${additional}|${endASCII})`; + +export const regex = new RegExp(urlRegex, "gui"); From ef15968a3d52ec3f508e7302a8b58ed0bf785749 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 22:26:45 +0530 Subject: [PATCH 28/35] Add some helpful pointers Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify/regex.js | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/domain/session/room/timeline/linkify/regex.js b/src/domain/session/room/timeline/linkify/regex.js index 7530d301..a67ce415 100644 --- a/src/domain/session/room/timeline/linkify/regex.js +++ b/src/domain/session/room/timeline/linkify/regex.js @@ -19,10 +19,13 @@ const nonASCII = "\\u{80}-\\u{10ffff}"; const endASCII = `[^\\s${nonASCII}.,?!]`; /* -URL must not contain non-ascii characters in host but may contain -them in path or fragment components. -https://matrix.org/ - valid -https://matrix.org - invalid +Things to keep in mind: +1. URL must not contain non-ascii characters in host but may contain + them in path or fragment components. + https://matrix.org/ - valid + https://matrix.org - invalid + +2. Do not treat punctuation at the end as a part of the URL (.,?!) */ const urlRegex = `${scheme}${host}+(?:${additional}|${endASCII})`; From b521797f4d3e238656c18df7c5842be3d5bd6c78 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 22:28:14 +0530 Subject: [PATCH 29/35] Explain the reason for double escapes Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify/regex.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/domain/session/room/timeline/linkify/regex.js b/src/domain/session/room/timeline/linkify/regex.js index a67ce415..b6e970cd 100644 --- a/src/domain/session/room/timeline/linkify/regex.js +++ b/src/domain/session/room/timeline/linkify/regex.js @@ -1,3 +1,8 @@ +/* +The regex is split into component strings; +meaning that any escapes (\) must be also +be escaped. +*/ const scheme = "(?:https|http|ftp):\\/\\/"; const host = "[a-zA-Z0-9:.\\[\\]-]"; @@ -24,7 +29,6 @@ Things to keep in mind: them in path or fragment components. https://matrix.org/ - valid https://matrix.org - invalid - 2. Do not treat punctuation at the end as a part of the URL (.,?!) */ const urlRegex = `${scheme}${host}+(?:${additional}|${endASCII})`; From 9d898bdf479098190b0551300c0733e9f811961c Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Tue, 11 May 2021 22:32:05 +0530 Subject: [PATCH 30/35] Change word to improve meaning Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify/regex.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain/session/room/timeline/linkify/regex.js b/src/domain/session/room/timeline/linkify/regex.js index b6e970cd..ca6e4320 100644 --- a/src/domain/session/room/timeline/linkify/regex.js +++ b/src/domain/session/room/timeline/linkify/regex.js @@ -18,7 +18,7 @@ const additional = `[\\/#][^\\s]*${end}`; /* Similarly, a URL not containing path or fragment must also end with a character that is not space nor punctuation. -However the ending character must also be ASCII. +Additionally, the ending character must also be ASCII. */ const nonASCII = "\\u{80}-\\u{10ffff}"; const endASCII = `[^\\s${nonASCII}.,?!]`; From 55b7c55efe5cf3bbf4b7889842ca304bc6064570 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Wed, 12 May 2021 15:52:05 +0530 Subject: [PATCH 31/35] Pull callback args into variables Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify/linkify.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/domain/session/room/timeline/linkify/linkify.js b/src/domain/session/room/timeline/linkify/linkify.js index 6ebf4e9f..632918e6 100644 --- a/src/domain/session/room/timeline/linkify/linkify.js +++ b/src/domain/session/room/timeline/linkify/linkify.js @@ -4,12 +4,14 @@ export function linkify(text, callback) { const matches = text.matchAll(regex); let curr = 0; for (let match of matches) { - callback(text.slice(curr, match.index), false); + const precedingText = text.slice(curr, match.index); + callback(precedingText, false); callback(match[0], true); const len = match[0].length; curr = match.index + len; } - callback(text.slice(curr), false); + const remainingText = text.slice(curr); + callback(remainingText, false); } export function tests() { From 2e657fbbbd14739d46b25db25140d9563acc105e Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Wed, 12 May 2021 15:56:33 +0530 Subject: [PATCH 32/35] Add failing test for link ending with < Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify/linkify.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/domain/session/room/timeline/linkify/linkify.js b/src/domain/session/room/timeline/linkify/linkify.js index 632918e6..f74dfa44 100644 --- a/src/domain/session/room/timeline/linkify/linkify.js +++ b/src/domain/session/room/timeline/linkify/linkify.js @@ -109,6 +109,12 @@ export function tests() { "Link with unicode after fragment without path must linkify": assert => { testLink(assert, "https://foo.bar.com#\uD83D\uDE03"); + }, + + "Link ends with <": assert => { + const link = "https://matrxi.org"; + const out = [{ type: "link", text: link }, { type: "text", text: "<" }]; + test(assert, link, out); } }; } From bb74656348973d50c5ea76857662496c32a58ce7 Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Wed, 12 May 2021 15:59:44 +0530 Subject: [PATCH 33/35] Grammar fix Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify/regex.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain/session/room/timeline/linkify/regex.js b/src/domain/session/room/timeline/linkify/regex.js index ca6e4320..33adc176 100644 --- a/src/domain/session/room/timeline/linkify/regex.js +++ b/src/domain/session/room/timeline/linkify/regex.js @@ -1,6 +1,6 @@ /* The regex is split into component strings; -meaning that any escapes (\) must be also +meaning that any escapes (\) must also be escaped. */ const scheme = "(?:https|http|ftp):\\/\\/"; From 00bcdbab37bbc0b3d9861e94a6239d3ccaf71cba Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Wed, 12 May 2021 16:15:07 +0530 Subject: [PATCH 34/35] Fix broken test Signed-off-by: RMidhunSuresh --- src/domain/session/room/timeline/linkify/linkify.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/domain/session/room/timeline/linkify/linkify.js b/src/domain/session/room/timeline/linkify/linkify.js index f74dfa44..8cea4b28 100644 --- a/src/domain/session/room/timeline/linkify/linkify.js +++ b/src/domain/session/room/timeline/linkify/linkify.js @@ -112,8 +112,8 @@ export function tests() { }, "Link ends with <": assert => { - const link = "https://matrxi.org"; - const out = [{ type: "link", text: link }, { type: "text", text: "<" }]; + const link = "https://matrix.org<"; + const out = [{ type: "link", text: "https://matrix.org" }, { type: "text", text: "<" }]; test(assert, link, out); } }; From 851e8d34a47c9e64f3d3e6f3e9a376c83bffd97b Mon Sep 17 00:00:00 2001 From: RMidhunSuresh Date: Wed, 12 May 2021 16:15:30 +0530 Subject: [PATCH 35/35] Incorporate better regex from review Signed-off-by: RMidhunSuresh --- .../session/room/timeline/linkify/regex.js | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/domain/session/room/timeline/linkify/regex.js b/src/domain/session/room/timeline/linkify/regex.js index 33adc176..2374ee23 100644 --- a/src/domain/session/room/timeline/linkify/regex.js +++ b/src/domain/session/room/timeline/linkify/regex.js @@ -4,24 +4,20 @@ meaning that any escapes (\) must also be escaped. */ const scheme = "(?:https|http|ftp):\\/\\/"; -const host = "[a-zA-Z0-9:.\\[\\]-]"; +const noSpaceNorPunctuation = "[^\\s.,?!]"; +const hostCharacter = "[a-zA-Z0-9:.\\[\\]-]"; /* -A URL containing path (/) or fragment (#) component -is allowed to end with any character which is not -space nor punctuation. The ending character may be -non-ASCII. +Using non-consuming group here to combine two criteria for the last character. +See point 1 below. */ -const end = "[^\\s.,?!]"; -const additional = `[\\/#][^\\s]*${end}`; +const host = `${hostCharacter}*(?=${hostCharacter})${noSpaceNorPunctuation}`; /* -Similarly, a URL not containing path or fragment must -also end with a character that is not space nor punctuation. -Additionally, the ending character must also be ASCII. +Use sub groups so we accept just / or #; but if anything comes after it, +it should not end with punctuation or space. */ -const nonASCII = "\\u{80}-\\u{10ffff}"; -const endASCII = `[^\\s${nonASCII}.,?!]`; +const pathOrFragment = `(?:[\\/#](?:[^\\s]*${noSpaceNorPunctuation})?)`; /* Things to keep in mind: @@ -30,7 +26,8 @@ Things to keep in mind: https://matrix.org/ - valid https://matrix.org - invalid 2. Do not treat punctuation at the end as a part of the URL (.,?!) +3. Path/fragment is optional. */ -const urlRegex = `${scheme}${host}+(?:${additional}|${endASCII})`; +const urlRegex = `${scheme}${host}${pathOrFragment}?`; -export const regex = new RegExp(urlRegex, "gui"); +export const regex = new RegExp(urlRegex, "gi");