FIX: Reduce mentions in blurbs to @username or @groupname

FIX: Reduce mentions in blurbs to @username or @groupname

The link to the user profile or group is useless and the URL encoded username or group name looks awful for Unicode names

diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb
index 88aec29..b3c8a8b 100644
--- a/app/services/search_indexer.rb
+++ b/app/services/search_indexer.rb
@@ -213,7 +213,9 @@ class SearchIndexer
       end
 
       document.css("a[href]").each do |node|
-        node.remove_attribute("href") if node["href"] == node.text
+        if node["href"] == node.text || MENTION_CLASSES.include?(node["class"])
+          node.remove_attribute("href")
+        end
       end
 
       me = new(strip_diacritics: strip_diacritics)
@@ -221,6 +223,7 @@ class SearchIndexer
       me.scrubbed.squish
     end
 
+    MENTION_CLASSES ||= %w{mention mention-group}
     ATTRIBUTES ||= %w{alt title href data-youtube-title}
 
     def start_element(_name, attributes = [])
diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb
index faeda4b..0bbe398 100644
--- a/spec/services/search_indexer_spec.rb
+++ b/spec/services/search_indexer_spec.rb
@@ -38,6 +38,18 @@ describe SearchIndexer do
     expect(scrubbed).to eq("http://meta.discourse.org/ link")
   end
 
+  it 'extracts @username from mentions' do
+    html = '<p><a class="mention" href="/u/%E7%8B%AE%E5%AD%90">@狮子</a> <a class="mention" href="/u/foo">@foo</a></p>'
+    scrubbed = scrub(html)
+    expect(scrubbed).to eq('@狮子 @foo')
+  end
+
+  it 'extracts @groupname from group mentions' do
+    html = '<p><a class="mention-group" href="/groups/%D0%B0%D0%B2%D1%82%D0%BE%D0%BC%D0%BE%D0%B1%D0%B8%D0%BB%D0%B8%D1%81%D1%82">@автомобилист</a></p>'
+    scrubbed = scrub(html)
+    expect(scrubbed).to eq('@автомобилист')
+  end
+
   it 'uses ignore_accent setting to strip diacritics' do
     html = "<p>HELLO Hétérogénéité Здравствуйте هتاف للترحيب 你好</p>"

GitHub sha: 71d19f6e

1 Like