SECURITY: Onebox canonical links bypassing FinalDestination checks (#13605)

SECURITY: Onebox canonical links bypassing FinalDestination checks (#13605)

diff --git a/lib/onebox/helpers.rb b/lib/onebox/helpers.rb
index 10dd9fa..09383d5 100644
--- a/lib/onebox/helpers.rb
+++ b/lib/onebox/helpers.rb
@@ -36,9 +36,12 @@ module Onebox
         # prefer canonical link
         canonical_link = doc.at('//link[@rel="canonical"]/@href')
         canonical_uri = Addressable::URI.parse(canonical_link)
-        if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" && canonical_uri.host != "localhost"
-          response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
-          doc = Nokogiri::HTML(response) if response
+        if canonical_link && canonical_uri && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
+          uri = FinalDestination.new(canonical_link, Oneboxer.get_final_destination_options(canonical_link)).resolve
+          if uri.present?
+            response = (fetch_response(uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
+            doc = Nokogiri::HTML(response) if response
+          end
         end
       end
 
diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb
index a1d1817..845fc2c 100644
--- a/lib/oneboxer.rb
+++ b/lib/oneboxer.rb
@@ -397,31 +397,11 @@ module Oneboxer
 
   def self.external_onebox(url, available_strategies = nil)
     Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
-
       uri = URI(url)
       available_strategies ||= Oneboxer.ordered_strategies(uri.hostname)
       strategy = available_strategies.shift
 
-      fd_options = {
-        ignore_redirects: ignore_redirects,
-        ignore_hostnames: blocked_domains,
-        force_get_hosts: force_get_hosts,
-        force_custom_user_agent_hosts: force_custom_user_agent_hosts,
-        preserve_fragment_url_hosts: preserve_fragment_url_hosts,
-        timeout: 5
-      }
-
-      if strategy && Oneboxer.strategies[strategy][:force_get_host]
-        fd_options[:force_get_hosts] = ["https://#{uri.hostname}"]
-      end
-      if strategy && Oneboxer.strategies[strategy][:force_custom_user_agent_host]
-        fd_options[:force_custom_user_agent_hosts] = ["https://#{uri.hostname}"]
-      end
-
-      user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present?
-      fd_options[:default_user_agent] = user_agent_override if user_agent_override
-
-      fd = FinalDestination.new(url, fd_options)
+      fd = FinalDestination.new(url, get_final_destination_options(url, strategy))
       uri = fd.resolve
 
       if fd.status != :resolved
@@ -453,6 +433,8 @@ module Oneboxer
       }
 
       onebox_options[:cookie] = fd.cookie if fd.cookie
+
+      user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present?
       onebox_options[:user_agent] = user_agent_override if user_agent_override
 
       r = Onebox.preview(uri.to_s, onebox_options)
@@ -552,4 +534,32 @@ module Oneboxer
     "ONEBOXER_STRATEGY_#{hostname}"
   end
 
+  def self.get_final_destination_options(url, strategy = nil)
+    fd_options = {
+      ignore_redirects: ignore_redirects,
+      ignore_hostnames: blocked_domains,
+      force_get_hosts: force_get_hosts,
+      force_custom_user_agent_hosts: force_custom_user_agent_hosts,
+      preserve_fragment_url_hosts: preserve_fragment_url_hosts,
+      timeout: 5
+    }
+
+    uri = URI(url)
+
+    if strategy.blank?
+      strategy = Oneboxer.ordered_strategies(uri.hostname).shift
+    end
+
+    if strategy && Oneboxer.strategies[strategy][:force_get_host]
+      fd_options[:force_get_hosts] = ["https://#{uri.hostname}"]
+    end
+    if strategy && Oneboxer.strategies[strategy][:force_custom_user_agent_host]
+      fd_options[:force_custom_user_agent_hosts] = ["https://#{uri.hostname}"]
+    end
+
+    user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present?
+    fd_options[:default_user_agent] = user_agent_override if user_agent_override
+
+    fd_options
+  end
 end
diff --git a/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb b/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb
index 46991aa..14cadc1 100644
--- a/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb
+++ b/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb
@@ -98,6 +98,7 @@ describe Onebox::Engine::AllowlistedGenericOnebox do
       before do
         stub_request(:get, mobile_url).to_return(status: 200, body: onebox_response('etsy_mobile'))
         stub_request(:get, canonical_url).to_return(status: 200, body: onebox_response('etsy'))
+        stub_request(:head, canonical_url).to_return(status: 200, body: "")
       end
 
       it 'fetches opengraph data and price from canonical link' do
@@ -142,6 +143,7 @@ describe Onebox::Engine::AllowlistedGenericOnebox do
         }
       )
       stub_request(:get, redirect_link).to_return(status: 200, body: onebox_response('dailymail'))
+      stub_request(:head, redirect_link).to_return(status: 200, body: "")
     end
 
     around do |example|
@@ -168,9 +170,10 @@ describe Onebox::Engine::AllowlistedGenericOnebox do
       before do
         stub_request(:get, "https://edition.cnn.com/2020/05/15/health/gallery/coronavirus-people-adopting-pets-photos/index.html")
           .to_return(status: 200, body: onebox_response('cnn'))
-
         stub_request(:get, "https://www.cnn.com/2020/05/15/health/gallery/coronavirus-people-adopting-pets-photos/index.html")
           .to_return(status: 200, body: onebox_response('cnn'))
+        stub_request(:head, "https://www.cnn.com/2020/05/15/health/gallery/coronavirus-people-adopting-pets-photos/index.html")
+          .to_return(status: 200, body: "")
       end
 
       it 'shows basic onebox' do
diff --git a/spec/lib/onebox/engine/google_photos_onebox_spec.rb b/spec/lib/onebox/engine/google_photos_onebox_spec.rb
index ba4888b..ffb1327 100644
--- a/spec/lib/onebox/engine/google_photos_onebox_spec.rb
+++ b/spec/lib/onebox/engine/google_photos_onebox_spec.rb
@@ -10,6 +10,8 @@ describe Onebox::Engine::GooglePhotosOnebox do
     stub_request(:get, link).to_return(status: 200, body: onebox_response("googlephotos"))
     stub_request(:get, "https://photos.google.com/share/AF1QipOV3gcu_edA8lyjJEpS9sC1g3AeCUtaZox11ylYZId7wJ7cthZ8M1kZXeAp5vhEPg?key=QktmUFNvdWpNVktERU5zWmVRZlZubzRRc0ttWWN3")
       .to_return(status: 200, body: onebox_response("googlephotos"))
+    stub_request(:head, "https://photos.google.com/share/AF1QipOV3gcu_edA8lyjJEpS9sC1g3AeCUtaZox11ylYZId7wJ7cthZ8M1kZXeAp5vhEPg?key=QktmUFNvdWpNVktERU5zWmVRZlZubzRRc0ttWWN3")
+      .to_return(status: 200, body: "")
   end
 
   it "includes album title" do
diff --git a/spec/lib/onebox/engine/twitter_status_onebox_spec.rb b/spec/lib/onebox/engine/twitter_status_onebox_spec.rb
index 9b0085e..daa6f36 100644
--- a/spec/lib/onebox/engine/twitter_status_onebox_spec.rb
+++ b/spec/lib/onebox/engine/twitter_status_onebox_spec.rb
@@ -59,7 +59,7 @@ describe Onebox::Engine::TwitterStatusOnebox do
 
   shared_context "quoted tweet info" do
     before do
-      @link = "https://twitter.com/Metallica/status/1128068672289890305"
+      @link = "https://twitter.com/metallica/status/1128068672289890305"
       @onebox_fixture = "twitterstatus_quoted"
 
       stub_request(:get, @link.downcase).to_return(status: 200, body: onebox_response(@onebox_fixture))
diff --git a/spec/lib/onebox/helpers_spec.rb b/spec/lib/onebox/helpers_spec.rb
index f24719f..42543ca 100644
--- a/spec/lib/onebox/helpers_spec.rb
+++ b/spec/lib/onebox/helpers_spec.rb
@@ -58,6 +58,7 @@ RSpec.describe Onebox::Helpers do
         uri = 'https://www.example.com'
         stub_request(:get, uri).to_return(status: 200, body: "<!DOCTYPE html><link rel='canonical' href='http://foobar.com/'/><p>invalid</p>")

[... diff too long, it was truncated ...]

GitHub sha: 05bdbd9f97b1cc039ffe3437f1e7ac17d9a4716f

This commit appears in #13605 which was approved by gschlager. It was merged by techAPJ.