FIX: URLs containing two # would fail to work

FIX: URLs containing two # would fail to work

Some URLs in browsers are non compliant and contain twos # this commit adds special handling for this edge case by auto encoding any fragments containing #

From 671469bcc7a190d63bf4283a44ee6477cd39f180 Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Tue, 11 Dec 2018 18:03:13 +1100
Subject: [PATCH] FIX: URLs containing two # would fail to work

Some URLs in browsers are non compliant and contain twos `#` this commit adds
special handling for this edge case by auto encoding any fragments containing `#`

diff --git a/app/models/topic_link.rb b/app/models/topic_link.rb
index 8876057..a7be7fa 100644
--- a/app/models/topic_link.rb
+++ b/app/models/topic_link.rb
@@ -117,11 +117,7 @@ SQL
     PrettyText
       .extract_links(post.cooked)
       .map do |u|
-        uri = begin
-          URI.parse(u.url)
-        rescue URI::Error
-        end
-
+        uri = UrlHelper.relaxed_parse(u.url)
         [u, uri]
       end
       .reject { |_, p| p.nil? || "mailto".freeze == p.scheme }
diff --git a/app/models/topic_link_click.rb b/app/models/topic_link_click.rb
index 7ae52e6..6caaff8 100644
--- a/app/models/topic_link_click.rb
+++ b/app/models/topic_link_click.rb
@@ -15,11 +15,7 @@ class TopicLinkClick < ActiveRecord::Base
     url = args[:url][0...TopicLink.max_url_length]
     return nil if url.blank?
 
-    uri = begin
-      URI.parse(url)
-    rescue URI::Error
-    end
-
+    uri = UrlHelper.relaxed_parse(url)
     urls = Set.new
     urls << url
     if url =~ /^http/
diff --git a/lib/url_helper.rb b/lib/url_helper.rb
index 377c7d3..976a63d 100644
--- a/lib/url_helper.rb
+++ b/lib/url_helper.rb
@@ -1,5 +1,20 @@
 class UrlHelper
 
+  # At the moment this handles invalid URLs that browser address bar accepts
+  # where second # is not encoded
+  #
+  # Longer term we can add support of simpleidn and encode unicode domains
+  def self.relaxed_parse(url)
+    url, fragment = url.split("#", 2)
+    uri = URI.parse(url)
+    if uri
+      fragment = URI.escape(fragment) if fragment&.include?('#')
+      uri.fragment = fragment
+      uri
+    end
+  rescue URI::Error
+  end
+
   def self.is_local(url)
     url.present? && (
       Discourse.store.has_been_uploaded?(url) ||
diff --git a/spec/components/url_helper_spec.rb b/spec/components/url_helper_spec.rb
index 2dd48c7..0ca6596 100644
--- a/spec/components/url_helper_spec.rb
+++ b/spec/components/url_helper_spec.rb
@@ -3,6 +3,15 @@ require_dependency 'url_helper'
 
 describe UrlHelper do
 
+  describe "#relaxed parse" do
+
+    it "can handle double #" do
+      url = UrlHelper.relaxed_parse("https://test.com#test#test")
+      expect(url.to_s).to eq("https://test.com#test%23test")
+    end
+
+  end
+
   describe "#is_local" do
 
     it "is true when the file has been uploaded" do
diff --git a/spec/fabricators/post_fabricator.rb b/spec/fabricators/post_fabricator.rb
index 6f1a9a9..b01f7e1 100644
--- a/spec/fabricators/post_fabricator.rb
+++ b/spec/fabricators/post_fabricator.rb
@@ -95,27 +95,28 @@ Fabricator(:post_with_uploads, from: :post) do
 end
 
 Fabricator(:post_with_uploads_and_links, from: :post) do
-  raw '
-<a href="/uploads/default/original/2X/2345678901234567.jpg">Link</a>
-<img src="/uploads/default/original/1X/1234567890123456.jpg">
-<a href="http://www.google.com">Google</a>
-<img src="http://foo.bar/image.png">
-<a class="attachment" href="//discourse-cloud-file-uploads.s3.dualstack.us-west-2.amazonaws.com/review/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)
-:smile:
-'
+  raw <<~RAW
+    <a href="/uploads/default/original/2X/2345678901234567.jpg">Link</a>
+    <img src="/uploads/default/original/1X/1234567890123456.jpg">
+    <a href="http://www.google.com">Google</a>
+    <img src="http://foo.bar/image.png">
+    <a class="attachment" href="//discourse-cloud-file-uploads.s3.dualstack.us-west-2.amazonaws.com/review/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)
+    :smile:
+  RAW
 end
 
 Fabricator(:post_with_external_links, from: :post) do
   user
   topic
-  raw "
-Here's a link to twitter: http://twitter.com
-And a link to google: http://google.com
-And a secure link to google: https://google.com
-And a markdown link: [forumwarz](http://forumwarz.com)
-And a markdown link with a period after it [codinghorror](http://www.codinghorror.com/blog).
-And one with a hash http://discourse.org#faq
-  "
+  raw <<~RAW
+    Here's a link to twitter: http://twitter.com
+    And a link to google: http://google.com
+    And a secure link to google: https://google.com
+    And a markdown link: [forumwarz](http://forumwarz.com)
+    And a markdown link with a period after it [codinghorror](http://www.codinghorror.com/blog).
+    And one with a hash http://discourse.org#faq
+    And one with a two hash http://discourse.org#a#b
+  RAW
 end
 
 Fabricator(:private_message_post, from: :post) do
diff --git a/spec/models/topic_link_click_spec.rb b/spec/models/topic_link_click_spec.rb
index 5b440f7..20da8c3 100644
--- a/spec/models/topic_link_click_spec.rb
+++ b/spec/models/topic_link_click_spec.rb
@@ -54,6 +54,10 @@ describe TopicLinkClick do
           TopicLinkClick.create_from(url: @topic_link.url, post_id: @post.id, ip: '127.0.0.0', user_id: @post.user_id)
         }.not_to change(TopicLinkClick, :count)
 
+        # can handle double # in a url
+        # NOTE: this is not compliant but exists in the wild
+        click = TopicLinkClick.create_from(url: "http://discourse.org#a#b", post_id: @post.id, ip: '127.0.0.1')
+        expect(click).to eq("http://discourse.org#a#b")
       end
 
       context 'with a valid url and post_id' do

GitHub

1 Like