FIX: ignore canonical link to localhost (#13577)

FIX: ignore canonical link to localhost (#13577)

diff --git a/lib/onebox/helpers.rb b/lib/onebox/helpers.rb
index 6b8f4d3..10dd9fa 100644
--- a/lib/onebox/helpers.rb
+++ b/lib/onebox/helpers.rb
@@ -36,7 +36,7 @@ module Onebox
         # prefer canonical link
         canonical_link = doc.at('//link[@rel="canonical"]/@href')
         canonical_uri = Addressable::URI.parse(canonical_link)
-        if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
+        if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" && canonical_uri.host != "localhost"
           response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
           doc = Nokogiri::HTML(response) if response
         end
diff --git a/spec/lib/onebox/helpers_spec.rb b/spec/lib/onebox/helpers_spec.rb
index 6532905..f24719f 100644
--- a/spec/lib/onebox/helpers_spec.rb
+++ b/spec/lib/onebox/helpers_spec.rb
@@ -52,6 +52,23 @@ RSpec.describe Onebox::Helpers do
 
       expect(described_class.fetch_html_doc(uri).to_s).to match("success")
     end
+
+    context "canonical link" do
+      it "follows canonical link" do
+        uri = 'https://www.example.com'
+        stub_request(:get, uri).to_return(status: 200, body: "<!DOCTYPE html><link rel='canonical' href='http://foobar.com/'/><p>invalid</p>")
+        stub_request(:get, 'http://foobar.com').to_return(status: 200, body: "<!DOCTYPE html><p>success</p>")
+
+        expect(described_class.fetch_html_doc(uri).to_s).to match("success")
+      end
+
+      it "does not follow canonical link pointing at localhost" do
+        uri = 'https://www.example.com'
+        stub_request(:get, uri).to_return(status: 200, body: "<!DOCTYPE html><link rel='canonical' href='http://localhost:3000/'/><p>success</p>")
+
+        expect(described_class.fetch_html_doc(uri).to_s).to match("success")
+      end
+    end
   end
 
   describe "redirects" do

GitHub sha: b63c9febe80f9b44087bb4acdc4047f60223d01c

This commit appears in #13577 which was approved by CvX. It was merged by techAPJ.