FIX: Make category slugs lowercase (#11277)

FIX: Make category slugs lowercase (#11277)

Admins could specify category slug with upper case characters and same slug, but with different cases could be used simultaneously.

diff --git a/app/assets/javascripts/discourse/app/lib/link-hashtags.js b/app/assets/javascripts/discourse/app/lib/link-hashtags.js
index d09cbbd..c6c633d 100644
--- a/app/assets/javascripts/discourse/app/lib/link-hashtags.js
+++ b/app/assets/javascripts/discourse/app/lib/link-hashtags.js
@@ -23,15 +23,19 @@ export function linkSeenHashtags($elem) {
         slug = slug.substr(0, slug.length - TAG_HASHTAG_POSTFIX.length);
       }
 
-      if (categoryHashtags[slug] && !hasTagSuffix) {
-        replaceSpan($(hashtag), slug, categoryHashtags[slug]);
-      } else if (tagHashtags[slug]) {
-        replaceSpan($(hashtag), slug, tagHashtags[slug]);
+      const lowerSlug = slug.toLowerCase();
+      if (categoryHashtags[lowerSlug] && !hasTagSuffix) {
+        replaceSpan($(hashtag), slug, categoryHashtags[lowerSlug]);
+      } else if (tagHashtags[lowerSlug]) {
+        replaceSpan($(hashtag), slug, tagHashtags[lowerSlug]);
       }
     });
   });
 
-  return slugs.uniq().filter((slug) => !checkedHashtags.has(slug));
+  return slugs
+    .map((slug) => slug.toLowerCase())
+    .uniq()
+    .filter((slug) => !checkedHashtags.has(slug));
 }
 
 export function fetchUnseenHashtags(slugs) {
diff --git a/app/assets/javascripts/discourse/tests/acceptance/hashtags-test.js b/app/assets/javascripts/discourse/tests/acceptance/hashtags-test.js
index 96db923..fd71dbb 100644
--- a/app/assets/javascripts/discourse/tests/acceptance/hashtags-test.js
+++ b/app/assets/javascripts/discourse/tests/acceptance/hashtags-test.js
@@ -27,14 +27,17 @@ acceptance("Category and Tag Hashtags", function (needs) {
 
 this is a tag hashtag #monkey
 
-category vs tag: #bug vs #bug::tag`
+category vs tag: #bug vs #bug::tag
+
+uppercase hashtag works too #BUG, #BUG::tag`
     );
 
     assert.equal(
       queryAll(".d-editor-preview:visible").html().trim(),
       `<p>this is a category hashtag <a href="/c/bugs" class="hashtag">#<span>bug</span></a></p>
 <p>this is a tag hashtag <a href="/tag/monkey" class="hashtag">#<span>monkey</span></a></p>
-<p>category vs tag: <a href="/c/bugs" class="hashtag">#<span>bug</span></a> vs <a href="/tag/bug" class="hashtag">#<span>bug</span></a></p>`
+<p>category vs tag: <a href="/c/bugs" class="hashtag">#<span>bug</span></a> vs <a href="/tag/bug" class="hashtag">#<span>bug</span></a></p>
+<p>uppercase hashtag works too <a href="/c/bugs" class="hashtag">#<span>BUG</span></a>, <a href="/tag/bug" class="hashtag">#<span>BUG</span></a></p>`
     );
   });
 });
diff --git a/app/models/category.rb b/app/models/category.rb
index 09cf67d..7085aa3 100644
--- a/app/models/category.rb
+++ b/app/models/category.rb
@@ -343,8 +343,7 @@ class Category < ActiveRecord::Base
     if slug.present?
       # if we don't unescape it first we strip the % from the encoded version
       slug = SiteSetting.slug_generation_method == 'encoded' ? CGI.unescape(self.slug) : self.slug
-      # sanitize the custom slug
-      self.slug = Slug.sanitize(slug)
+      self.slug = Slug.for(slug, '', method: :encoded)
 
       if self.slug.blank?
         errors.add(:slug, :invalid)
@@ -795,8 +794,12 @@ class Category < ActiveRecord::Base
     return nil if slug_path.empty?
     return nil if slug_path.size > SiteSetting.max_category_nesting
 
-    if SiteSetting.slug_generation_method == "encoded"
-      slug_path.map! { |slug| CGI.escape(slug) }
+    slug_path.map! do |slug|
+      if SiteSetting.slug_generation_method == "encoded"
+        CGI.escape(slug.downcase)
+      else
+        slug.downcase
+      end
     end
 
     query =
diff --git a/db/migrate/20201117212328_set_category_slug_to_lower.rb b/db/migrate/20201117212328_set_category_slug_to_lower.rb
new file mode 100644
index 0000000..8d7d275
--- /dev/null
+++ b/db/migrate/20201117212328_set_category_slug_to_lower.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+class SetCategorySlugToLower < ActiveRecord::Migration[6.0]
+  def up
+    remove_index(:categories, name: 'unique_index_categories_on_slug')
+
+    categories = DB.query("SELECT id, name, slug, parent_category_id FROM categories")
+    old_slugs = categories.map { |c| [c.id, c.slug] }.to_h
+    updates = {}
+
+    # Resolve duplicate tags by replacing mixed case slugs with new ones
+    # extracted from category names
+    slugs = categories
+      .filter { |category| category.slug.present? }
+      .group_by { |category| [category.parent_category_id, category.slug.downcase] }
+      .map { |slug, cats| [slug, cats.size] }
+      .to_h
+
+    categories.each do |category|
+      old_parent_and_slug = [category.parent_category_id, category.slug.downcase]
+      next if category.slug.blank? ||
+              category.slug == category.slug.downcase ||
+              slugs[old_parent_and_slug] <= 1
+
+      new_slug = category.name.parameterize.tr("_", "-").squeeze('-').gsub(/\A-+|-+\z/, '')[0..255]
+      new_slug = '' if (new_slug =~ /[^\d]/).blank?
+      new_parent_and_slug = [category.parent_category_id, new_slug]
+      next if new_slug.blank? ||
+              (slugs[new_parent_and_slug].present? && slugs[new_parent_and_slug] > 0)
+
+      updates[category.id] = category.slug = new_slug
+      slugs[old_parent_and_slug] -= 1
+      slugs[new_parent_and_slug] = 1
+    end
+
+    # Reset left conflicting slugs
+    slugs = categories
+      .filter { |category| category.slug.present? }
+      .group_by { |category| [category.parent_category_id, category.slug.downcase] }
+      .map { |slug, cats| [slug, cats.size] }
+      .to_h
+
+    categories.each do |category|
+      old_parent_and_slug = [category.parent_category_id, category.slug.downcase]
+      next if category.slug.blank? ||
+              category.slug == category.slug.downcase ||
+              slugs[old_parent_and_slug] <= 1
+
+      updates[category.id] = category.slug = ''
+      slugs[old_parent_and_slug] -= 1
+    end
+
+    # Update all category slugs
+    updates.each do |id, slug|
+      execute <<~SQL
+        UPDATE categories
+        SET slug = '#{PG::Connection.escape_string(slug)}'
+        WHERE id = #{id} -- #{PG::Connection.escape_string(old_slugs[id])}
+      SQL
+    end
+
+    # Ensure all slugs are lowercase
+    execute "UPDATE categories SET slug = LOWER(slug)"
+
+    add_index(
+      :categories,
+      'COALESCE(parent_category_id, -1), LOWER(slug)',
+      name: 'unique_index_categories_on_slug',
+      where: "slug != ''",
+      unique: true
+    )
+  end
+
+  def down
+    remove_index(:categories, name: 'unique_index_categories_on_slug')
+
+    add_index(
+      :categories,
+      'COALESCE(parent_category_id, -1), slug',
+      name: 'unique_index_categories_on_slug',
+      where: "slug != ''",
+      unique: true
+    )
+  end
+end
diff --git a/lib/slug.rb b/lib/slug.rb
index 9f846cd..ebb1773 100644
--- a/lib/slug.rb
+++ b/lib/slug.rb
@@ -6,28 +6,22 @@ module Slug
   CHAR_FILTER_REGEXP = /[:\/\?#\[\]@!\$&'\(\)\*\+,;=_\.~%\\`^\s|\{\}"<>]+/ # :/?#[]@!$&'()*+,;=_.~%\`^|{}"<>
   MAX_LENGTH = 255
 
-  def self.for(string, default = 'topic', max_length = MAX_LENGTH)
+  def self.for(string, default = 'topic', max_length = MAX_LENGTH, method: nil)
     string = string.gsub(/:([\w\-+]+(?::t\d)?):/, '') if string.present? # strip emoji strings
-
-    if SiteSetting.slug_generation_method == 'encoded'
-      max_length = 9999 # do not truncate encoded slugs
-    end
+    method = (method || SiteSetting.slug_generation_method || :ascii).to_sym
+    max_length = 9999 if method == :encoded # do not truncate encoded slugs
 
     slug =
-      case (SiteSetting.slug_generation_method || :ascii).to_sym
+      case method
       when :ascii then self.ascii_generator(string)
       when :encoded then self.encoded_generator(string)
       when :none then self.none_generator(string)
       end
+
     slug = self.prettify_slug(slug, max_length: max_length)
     (slug.blank? || slug_is_only_numbers?(slug)) ? default : slug
   end
 

[... diff too long, it was truncated ...]

GitHub sha: ec0212e5

This commit appears in #11277 which was approved by CvX and ZogStriP. It was merged by nbianca.