FIX: Canonical URLs may be relative (#14825)

FIX: Canonical URLs may be relative (#14825)

FinalDestination’s follow_canonical mode used for embedded topics should work when canonical URLs are relative, as specified in RFC 6596

diff --git a/lib/final_destination.rb b/lib/final_destination.rb
index 6ab994f..09f4d0c 100644
--- a/lib/final_destination.rb
+++ b/lib/final_destination.rb
@@ -225,7 +225,7 @@ class FinalDestination
       end
 
       if @follow_canonical
-        next_url = uri(fetch_canonical_url(response.body))
+        next_url = fetch_canonical_url(response.body)
 
         if next_url.to_s.present? && next_url != @uri
           @follow_canonical = false
@@ -481,10 +481,17 @@ class FinalDestination
 
   def fetch_canonical_url(body)
     return if body.blank?
-    canonical_link = Nokogiri::HTML5(body).at("link[rel='canonical']")
 
-    return if canonical_link.nil?
+    canonical_element = Nokogiri::HTML5(body).at("link[rel='canonical']")
+    return if canonical_element.nil?
+    canonical_uri = uri(canonical_element['href'])
+    return if canonical_uri.blank?
 
-    canonical_link['href']
+    return canonical_uri if canonical_uri.host.present?
+    parts = [@uri.host, canonical_uri.to_s]
+    complete_url = canonical_uri.to_s.starts_with?('/') ? parts.join('') : parts.join('/')
+    complete_url = "#{@uri.scheme}://#{complete_url}" if @uri.scheme
+
+    uri(complete_url)
   end
 end
diff --git a/spec/components/final_destination_spec.rb b/spec/components/final_destination_spec.rb
index 8b78132..e9548ea 100644
--- a/spec/components/final_destination_spec.rb
+++ b/spec/components/final_destination_spec.rb
@@ -194,6 +194,31 @@ describe FinalDestination do
         expect(final.status).to eq(:resolved)
       end
 
+      it 'resolves the canonical link when the URL is relative' do
+        host = "https://codinghorror.com"
+
+        canonical_follow("#{host}/blog", "/blog/canonical")
+        stub_request(:head, "#{host}/blog/canonical").to_return(doc_response)
+
+        final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true))
+
+        expect(final.resolve.to_s).to eq("#{host}/blog/canonical")
+        expect(final.redirected?).to eq(false)
+        expect(final.status).to eq(:resolved)
+      end
+
+      it 'resolves the canonical link when the URL is relative and does not start with the / symbol' do
+        host = "https://codinghorror.com"
+        canonical_follow("#{host}/blog", "blog/canonical")
+        stub_request(:head, "#{host}/blog/canonical").to_return(doc_response)
+
+        final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true))
+
+        expect(final.resolve.to_s).to eq("#{host}/blog/canonical")
+        expect(final.redirected?).to eq(false)
+        expect(final.status).to eq(:resolved)
+      end
+
       it "does not follow the canonical link if it's the same as the current URL" do
         canonical_follow("https://eviltrout.com", "https://eviltrout.com")
 

GitHub sha: 53abcd825d2abbd2065796c974314f0952b26041

This commit appears in #14825 which was approved by eviltrout. It was merged by romanrizzi.