FIX: encode the URL per RFC 3986 spec (#404)

FIX: encode the URL per RFC 3986 spec (#404)

Inspired from: (PUP-1890) Add Puppet::Util.uri_encode · puppetlabs/puppet@49bfae4 · GitHub

diff --git a/lib/onebox/engine.rb b/lib/onebox/engine.rb
index c48077d..6b5a25f 100644
--- a/lib/onebox/engine.rb
+++ b/lib/onebox/engine.rb
@@ -81,12 +81,7 @@ module Onebox
     end
 
     def link
-      @url.gsub(/['\"<>]/,
-        "'" => '&#39;',
-        '"' => '&quot;',
-        '<' => '&lt;',
-        '>' => '&gt;',
-      )
+      ::Onebox::Helpers.uri_encode(@url)
     end
 
     def always_https?
diff --git a/lib/onebox/helpers.rb b/lib/onebox/helpers.rb
index cd7fbc7..0835fc2 100644
--- a/lib/onebox/helpers.rb
+++ b/lib/onebox/helpers.rb
@@ -185,5 +185,52 @@ module Onebox
       end
       src
     end
+
+    RFC_3986_URI_REGEX = /^(?<scheme>([^:\/?#]+):)?(?<authority>\/\/([^\/?#]*))?(?<path>[^?#]*)(\?(?<query>[^#]*))?(#(?<fragment>.*))?$/
+
+    # Percent-encodes a URI query parameter per RFC3986 - https://tools.ietf.org/html/rfc3986
+    def self.uri_query_encode(query_string)
+      return "" unless query_string
+
+      # query can encode space to %20 OR +
+      # + MUST be encoded as %2B
+      # in RFC3968 both query and fragment are defined as:
+      # = *( pchar / "/" / "?" )
+      # CGI.escape turns space into + which is the most backward compatible
+      # however it doesn't roundtrip through URI.unescape which prefers %20
+      CGI.escape(query_string).gsub('+', '%20')
+    end
+
+    # Percent-encodes a URI string per RFC3986 - https://tools.ietf.org/html/rfc3986
+    def self.uri_encode(url)
+      return "" unless url
+
+      # parse uri into named matches, then reassemble properly encoded
+      parts = url.match(RFC_3986_URI_REGEX)
+
+      encoded = ""
+      encoded += parts[:scheme] unless parts[:scheme].nil?
+      encoded += parts[:authority] unless parts[:authority].nil?
+
+      # path requires space to be encoded as %20 (NEVER +)
+      # + should be left unencoded
+      # URI::parse and URI::Generic.build don't like paths encoded with CGI.escape
+      # URI.escape does not change / to %2F and : to %3A like CGI.escape
+      encoded += URI.escape(parts[:path]) unless parts[:path].nil?
+
+      # each query parameter
+      if !parts[:query].nil?
+        query_string = parts[:query].split('&').map do |pair|
+          # can optionally be separated by an =
+          pair.split('=').map do |v|
+            uri_query_encode(v)
+          end.join('=')
+        end.join('&')
+        encoded += '?' + query_string
+      end
+
+      encoded += '#' + uri_query_encode(parts[:fragment]) unless parts[:fragment].nil?
+      encoded
+    end
   end
 end
diff --git a/spec/lib/onebox/engine_spec.rb b/spec/lib/onebox/engine_spec.rb
index fba8894..522c0f5 100644
--- a/spec/lib/onebox/engine_spec.rb
+++ b/spec/lib/onebox/engine_spec.rb
@@ -21,7 +21,7 @@ describe Onebox::Engine do
     before { allow(Onebox::View).to receive(:template) { %|this should be a template| } }
 
     it "escapes `link`" do
-      html = OneboxEngineExample.new(%|http://foo.com/'?a=1&b=2|).to_html
+      html = OneboxEngineExample.new(%|http://foo.com/bar?a='&b=2|).to_html
       expect(html).not_to match(/'/)
     end
   end
diff --git a/spec/lib/onebox/helpers_spec.rb b/spec/lib/onebox/helpers_spec.rb
index 56d8d1f..5fd0cb8 100644
--- a/spec/lib/onebox/helpers_spec.rb
+++ b/spec/lib/onebox/helpers_spec.rb
@@ -70,4 +70,14 @@ RSpec.describe Onebox::Helpers do
     end
   end
 
+  describe '.uri_encode' do
+    it { expect(described_class.uri_encode('http://example.com/f"o&o?[b"ar]')).to eq("http://example.com/f%22o&o?%5Bb%22ar%5D") }
+    it { expect(described_class.uri_encode("http://example.com/f.o~o;?<ba'r>")).to eq("http://example.com/f.o~o;?%3Cba%27r%3E") }
+    it { expect(described_class.uri_encode("http://example.com/<pa'th>(foo)?b+a+r")).to eq("http://example.com/%3Cpa'th%3E(foo)?b%2Ba%2Br") }
+    it { expect(described_class.uri_encode("http://example.com/p,a:t!h-f$o@o*?b!a#r@")).to eq("http://example.com/p,a:t!h-f$o@o*?b%21a#r%40") }
+    it { expect(described_class.uri_encode("http://example.com/path&foo?b'a<r>&qu(er)y=1")).to eq("http://example.com/path&foo?b%27a%3Cr%3E&qu%28er%29y=1") }
+    it { expect(described_class.uri_encode("http://example.com/index&<script>alert('XSS');</script>")).to eq("http://example.com/index&%3Cscript%3Ealert('XSS');%3C/script%3E") }
+    it { expect(described_class.uri_encode("http://example.com/index.html?message=<script>alert('XSS');</script>")).to eq("http://example.com/index.html?message=%3Cscript%3Ealert%28%27XSS%27%29%3B%3C%2Fscript%3E") }
+    it { expect(described_class.uri_encode("http://example.com/index.php/<IFRAME SRC=source.com onload='alert(document.cookie)'></IFRAME>")).to eq("http://example.com/index.php/%3CIFRAME%20SRC=source.com%20onload='alert(document.cookie)'%3E%3C/IFRAME%3E") }
+  end
 end

GitHub sha: 7a1885c4

2 Likes

This commit has been mentioned on Discourse Meta. There might be relevant details there:

This commit has been mentioned on Discourse Meta. There might be relevant details there: