FIX: Remove duplicate Emoji names from blurb

FIX: Remove duplicate Emoji names from blurb

The blurb contained the value of the alt and title attribute of Emojis. Both values are always the same.

diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb
index b3c8a8b..e455526 100644
--- a/app/services/search_indexer.rb
+++ b/app/services/search_indexer.rb
@@ -212,6 +212,10 @@ class SearchIndexer
         end
       end
 
+      document.css("img[class='emoji']").each do |node|
+        node.remove_attribute("alt")
+      end
+
       document.css("a[href]").each do |node|
         if node["href"] == node.text || MENTION_CLASSES.include?(node["class"])
           node.remove_attribute("href")
diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb
index 0bbe398..1639168 100644
--- a/spec/services/search_indexer_spec.rb
+++ b/spec/services/search_indexer_spec.rb
@@ -50,6 +50,12 @@ describe SearchIndexer do
     expect(scrubbed).to eq('@автомобилист')
   end
 
+  it 'extracts emoji name from emoji image' do
+    html = %Q|<img src="#{Discourse.base_url_no_prefix}/images/emoji/twitter/wink.png?v=9" title=":wink:" class="emoji" alt=":wink:">|
+    scrubbed = scrub(html)
+    expect(scrubbed).to eq(':wink:')
+  end
+
   it 'uses ignore_accent setting to strip diacritics' do
     html = "<p>HELLO Hétérogénéité Здравствуйте هتاف للترحيب 你好</p>"

GitHub sha: 876c4f20

1 Like