FEATURE: default canonical URL (#9738)

FEATURE: default canonical URL (#9738)

For pages that do not specify canonical URL we will default to https://SITENAME/PATH.

This ensures that if a URL is crawled on the CDN the search ranking will transfer to the main site.

Additionally we whitelist the ?page param

diff --git a/lib/canonical_url.rb b/lib/canonical_url.rb
index b46f006..d318bd5 100644
--- a/lib/canonical_url.rb
+++ b/lib/canonical_url.rb
@@ -13,9 +13,18 @@ module CanonicalURL
   end
 
   module Helpers
+    ALLOWED_CANONICAL_PARAMS = %w(page)
     def canonical_link_tag(url = nil)
-      return '' unless url || @canonical_url
-      tag('link', rel: 'canonical', href: url || @canonical_url || request.url)
+      tag('link', rel: 'canonical', href: url || @canonical_url || default_canonical)
+    end
+
+    def default_canonical
+      canonical = +"#{Discourse.base_url_no_prefix}#{request.path}"
+      allowed_params = params.select { |key| ALLOWED_CANONICAL_PARAMS.include?(key) }
+      if allowed_params.present?
+        canonical << "?#{allowed_params.keys.zip(allowed_params.values).map { |key, value| "#{key}=#{value}" }.join("&")}"
+      end
+      canonical
     end
   end
 end
diff --git a/spec/requests/application_controller_spec.rb b/spec/requests/application_controller_spec.rb
index 4a96174..8e4dc43 100644
--- a/spec/requests/application_controller_spec.rb
+++ b/spec/requests/application_controller_spec.rb
@@ -605,4 +605,18 @@ RSpec.describe ApplicationController do
     expect(response.status).to eq(200)
     expect(response.body).to include('Discourse')
   end
+
+  it 'has canonical tag' do
+    get '/', headers: { HTTP_ACCEPT: '*/*' }
+    expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/" })
+    get '/?query_param=true', headers: { HTTP_ACCEPT: '*/*' }
+    expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/" })
+    get '/latest?page=2&additional_param=true', headers: { HTTP_ACCEPT: '*/*' }
+    expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/latest?page=2" })
+    get '/404', headers: { HTTP_ACCEPT: '*/*' }
+    expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/404" })
+    topic = create_post.topic
+    get "/t/#{topic.slug}/#{topic.id}"
+    expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/t/#{topic.slug}/#{topic.id}" })
+  end
 end

GitHub sha: 4c8bece1

This commit appears in #9738 which was approved by eviltrout. It was merged by SamSaffron.