From 59767124ed7768c82621c414549d24d33096b16e Mon Sep 17 00:00:00 2001 From: Juhani Krekelä Date: Thu, 8 Jul 2021 09:14:41 +0300 Subject: Improve URL matching Currently the weltschmerz URL regex does not match URLs with quotes or parentheses, considering the URL to end when one is encountered. On the other hand, if a URL is surrounded by angle brackets it includes the closing '>' in the URL match. Additionally, the regex allows URL to contain a space if and only if it is the second character of the host component of the URL. Most of this appears to be down to bugs in the regex as it is currently written. This rewrites the regex to be cleaner and easier to read, while maintaining the intended logic of the original. --- terminal.vala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terminal.vala b/terminal.vala index 86f086f..b2a610f 100644 --- a/terminal.vala +++ b/terminal.vala @@ -1,6 +1,6 @@ [GtkTemplate (ui = "/weltschmerz/ui/terminal.ui")] class Terminal : Gtk.Overlay { - const string URL_REGEX = """(?>https?|ftp):\/\/[^\s\$.?#].(?>[^\s()"]*|\([^\s]*\)|"[^\s"]*")"""; + const string URL_REGEX = """(?>https?|ftp):\/\/[^[:punct:][:space:]](?>[^][)(><"“”[:space:]]+|\([^)([:space:]]*\)|"[^"[:space:]]*")+"""; const uint PCRE2_CASELESS = 0x00000008u; const uint PCRE2_MULTILINE = 0x00000400u; const uint PCRE2_NO_UTF_CHECK = 0x00080000u; -- cgit v1.2.3-2-gb3c3