FEATURE: Censor Oneboxes (#12902)

FEATURE: Censor Oneboxes (#12902)

Previously onebox content was not passed by the censor regex, meaning you could sneak in censored words via onebox.

diff --git a/app/services/word_watcher.rb b/app/services/word_watcher.rb
index 2ccc754..3669914 100644
--- a/app/services/word_watcher.rb
+++ b/app/services/word_watcher.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 class WordWatcher
+  REPLACEMENT_LETTER ||= CGI.unescape_html("■")
 
   def initialize(raw)
     @raw = raw
@@ -70,6 +71,27 @@ class WordWatcher
     "watched-words-list:#{action}"
   end
 
+  def self.censor(html)
+    regexp = WordWatcher.word_matcher_regexp(:censor)
+    return html if regexp.blank?
+
+    doc = Nokogiri::HTML5::fragment(html)
+    doc.traverse do |node|
+      if node.text?
+        node.content = node.content.gsub(regexp) do |match|
+          # the regex captures leading whitespaces
+          padding = match.size - match.lstrip.size
+          if padding > 0
+            match[0..padding - 1] + REPLACEMENT_LETTER * (match.size - padding)
+          else
+            REPLACEMENT_LETTER * match.size
+          end
+        end
+      end
+    end
+    doc.to_s
+  end
+
   def self.clear_cache!
     WatchedWord.actions.each do |a, i|
       Discourse.cache.delete word_matcher_regexp_key(a)
diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb
index 69bb14c..4a7461f 100644
--- a/lib/oneboxer.rb
+++ b/lib/oneboxer.rb
@@ -455,7 +455,10 @@ module Oneboxer
       onebox_options[:user_agent] = user_agent_override if user_agent_override
 
       r = Onebox.preview(uri.to_s, onebox_options)
-      result = { onebox: r.to_s, preview: r&.placeholder_html.to_s }
+      result = {
+        onebox: WordWatcher.censor(r.to_s),
+        preview: WordWatcher.censor(r&.placeholder_html.to_s)
+      }
 
       # NOTE: Call r.errors after calling placeholder_html
       if r.errors.any?
diff --git a/spec/components/oneboxer_spec.rb b/spec/components/oneboxer_spec.rb
index 3628e58..527f054 100644
--- a/spec/components/oneboxer_spec.rb
+++ b/spec/components/oneboxer_spec.rb
@@ -177,6 +177,29 @@ describe Oneboxer do
     expect(Oneboxer.external_onebox(url)[:onebox]).to be_present
   end
 
+  it "censors external oneboxes" do
+    Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "bad word")
+
+    url = 'https://example.com/'
+    stub_request(:any, url).to_return(status: 200, body: <<~HTML, headers: {})
+      <html>
+      <head>
+        <meta property="og:title" content="title with bad word">
+        <meta property="og:description" content="description with bad word">
+      </head>
+      <body>
+        <p>content with bad word</p>
+      </body>
+      <html>
+    HTML
+
+    onebox = Oneboxer.external_onebox(url)
+    expect(onebox[:onebox]).to include('title with')
+    expect(onebox[:onebox]).not_to include('bad word')
+    expect(onebox[:preview]).to include('title with')
+    expect(onebox[:preview]).not_to include('bad word')
+  end
+
   it "uses the Onebox custom user agent on specified hosts" do
     SiteSetting.force_custom_user_agent_hosts = "http://codepen.io|https://video.discourse.org/"
     url = 'https://video.discourse.org/presentation.mp4'

GitHub sha: d184fe59

This commit appears in #12902 which was approved by ZogStriP. It was merged by SamSaffron.