From f16376c9fa7b51c71bc7752e85e3d001adc244e2 Mon Sep 17 00:00:00 2001 From: Juhani Krekelä Date: Mon, 26 Jul 2021 19:52:13 +0300 Subject: Don't consider punctuation as part of the URL We currently consider periods, commas, colons, semicolons, exclamation points and question marks following a URL as part of it. While ending a URL with these characters is technically speaking valid, more commonly they are a result of people ending a clause with a URL. Let's ignore such characters if they are present at the very end of a URL. Make sure to still match them, however, if any non-punctuation characters follow. This way, almost all actual usage of punctuation within URLs is unaffected. --- terminal.vala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terminal.vala b/terminal.vala index c392d7f..47c33bc 100644 --- a/terminal.vala +++ b/terminal.vala @@ -1,6 +1,6 @@ [GtkTemplate (ui = "/weltschmerz/ui/terminal.ui")] class Terminal : Gtk.Overlay { - const string URL_REGEX = """(?>https?|ftp|gopher):\/\/[^[:punct:][:space:]](?>[^][)(><"“”[:space:]]+|\([^)([:space:]]*\)|"[^"[:space:]]*")+"""; + const string URL_REGEX = """(?>https?|ftp|gopher):\/\/[^[:punct:][:space:]](?>(?>[.,!?:;]*)(?>[^][)(><"“”.,!?:;[:space:]]+|\([^)([:space:]]*\)|"[^"[:space:]]*"))+"""; const uint PCRE2_CASELESS = 0x00000008u; const uint PCRE2_MULTILINE = 0x00000400u; const uint PCRE2_NO_UTF_CHECK = 0x00080000u; -- cgit v1.2.3-2-gb3c3