FIX: Various watched words improvements

FIX: Various watched words improvements

  • Client-side censoring fixed for non-chrome browsers. (Regular expression rewritten to avoid lookback)
  • Regex generation is now done on the server, to reduce repeated logic, and make it easier to extend in plugins
  • Censor tests are moved to ruby, to ensure everything works end-to-end
  • If “watched words regular expressions” is enabled, warn the admin when the generated regex is invalid
diff --git a/app/assets/javascripts/discourse/lib/text.js.es6 b/app/assets/javascripts/discourse/lib/text.js.es6
index ee51cb5..cb02036 100644
--- a/app/assets/javascripts/discourse/lib/text.js.es6
+++ b/app/assets/javascripts/discourse/lib/text.js.es6
@@ -13,7 +13,7 @@ function getOpts(opts) {
     {
       getURL: Discourse.getURLWithCDN,
       currentUser: Discourse.__container__.lookup("current-user:main"),
-      censoredWords: site.censored_words,
+      censoredRegexp: site.censored_regexp,
       siteSettings,
       formatUsername
     },
diff --git a/app/assets/javascripts/discourse/models/topic.js.es6 b/app/assets/javascripts/discourse/models/topic.js.es6
index ca2c435..d44afe9 100644
--- a/app/assets/javascripts/discourse/models/topic.js.es6
+++ b/app/assets/javascripts/discourse/models/topic.js.es6
@@ -97,7 +97,7 @@ const Topic = RestModel.extend({
   fancyTitle(title) {
     let fancyTitle = censor(
       emojiUnescape(title || ""),
-      Discourse.Site.currentProp("censored_words")
+      Discourse.Site.currentProp("censored_regexp")
     );
 
     if (Discourse.SiteSettings.support_mixed_text_direction) {
diff --git a/app/assets/javascripts/pretty-text/censored-words.js.es6 b/app/assets/javascripts/pretty-text/censored-words.js.es6
index 32c21dc..4453e3f 100644
--- a/app/assets/javascripts/pretty-text/censored-words.js.es6
+++ b/app/assets/javascripts/pretty-text/censored-words.js.es6
@@ -1,75 +1,19 @@
-function escapeRegexp(text) {
-  return text.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&").replace(/\*/g, "S*");
-}
-
-function createCensorRegexp(patterns) {
-  return new RegExp(`((?<!\\w)(?:${patterns.join("|")}))(?!\\w)`, "ig");
-}
-
-export function censorFn(
-  censoredWords,
-  replacementLetter,
-  watchedWordsRegularExpressions
-) {
-  let patterns = [];
-
-  replacementLetter = replacementLetter || "&#9632;";
-
-  if (censoredWords && censoredWords.length) {
-    patterns = censoredWords.split("|");
-    if (!watchedWordsRegularExpressions) {
-      patterns = patterns.map(t => `(${escapeRegexp(t)})`);
-    }
-  }
-
-  if (patterns.length) {
-    let censorRegexp;
-
-    try {
-      if (watchedWordsRegularExpressions) {
-        censorRegexp = new RegExp(
-          "((?:" + patterns.join("|") + "))(?![^\\(]*\\))",
-          "ig"
+export function censorFn(regexpString, replacementLetter) {
+  if (regexpString) {
+    let censorRegexp = new RegExp(regexpString, "ig");
+    replacementLetter = replacementLetter || "&#9632;";
+
+    return function(text) {
+      text = text.replace(censorRegexp, (fullMatch, ...groupMatches) => {
+        const stringMatch = groupMatches.find(g => typeof g === "string");
+        return fullMatch.replace(
+          stringMatch,
+          new Array(stringMatch.length + 1).join(replacementLetter)
         );
-      } else {
-        censorRegexp = createCensorRegexp(patterns);
-      }
-
-      if (censorRegexp) {
-        return function(text) {
-          let original = text;
-
-          try {
-            let m = censorRegexp.exec(text);
-            const fourCharReplacement = new Array(5).join(replacementLetter);
-
-            while (m && m[0]) {
-              if (m[0].length > original.length) {
-                return original;
-              } // regex is dangerous
-              if (watchedWordsRegularExpressions) {
-                text = text.replace(censorRegexp, fourCharReplacement);
-              } else {
-                const replacement = new Array(m[0].length + 1).join(
-                  replacementLetter
-                );
-                text = text.replace(
-                  createCensorRegexp([escapeRegexp(m[0])]),
-                  replacement
-                );
-              }
-              m = censorRegexp.exec(text);
-            }
+      });
 
-            return text;
-          } catch (e) {
-            return original;
-          }
-        };
-      }
-    } catch (e) {
-      // fall through
-    }
+      return text;
+    };
   }
 
   return function(t) {
@@ -77,6 +21,6 @@ export function censorFn(
   };
 }
 
-export function censor(text, censoredWords, replacementLetter) {
-  return censorFn(censoredWords, replacementLetter)(text);
+export function censor(text, censoredRegexp, replacementLetter) {
+  return censorFn(censoredRegexp, replacementLetter)(text);
 }
diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/censored.js.es6 b/app/assets/javascripts/pretty-text/engines/discourse-markdown/censored.js.es6
index e44c036..6273a22 100644
--- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/censored.js.es6
+++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/censored.js.es6
@@ -29,15 +29,11 @@ export function setup(helper) {
   });
 
   helper.registerPlugin(md => {
-    const words = md.options.discourse.censoredWords;
+    const censoredRegexp = md.options.discourse.censoredRegexp;
 
-    if (words && words.length > 0) {
+    if (censoredRegexp) {
       const replacement = String.fromCharCode(9632);
-      const censor = censorFn(
-        words,
-        replacement,
-        md.options.discourse.watchedWordsRegularExpressions
-      );
+      const censor = censorFn(censoredRegexp, replacement);
       md.core.ruler.push("censored", state => censorTree(state, censor));
     }
   });
diff --git a/app/assets/javascripts/pretty-text/pretty-text.js.es6 b/app/assets/javascripts/pretty-text/pretty-text.js.es6
index e656c9a..7a867d8 100644
--- a/app/assets/javascripts/pretty-text/pretty-text.js.es6
+++ b/app/assets/javascripts/pretty-text/pretty-text.js.es6
@@ -29,7 +29,7 @@ export function buildOptions(state) {
     lookupUploadUrls,
     previewing,
     linkify,
-    censoredWords,
+    censoredRegexp,
     disableEmojis
   } = state;
 
@@ -67,7 +67,7 @@ export function buildOptions(state) {
     formatUsername,
     emojiUnicodeReplacer,
     lookupUploadUrls,
-    censoredWords,
+    censoredRegexp,
     allowedHrefSchemes: siteSettings.allowed_href_schemes
       ? siteSettings.allowed_href_schemes.split("|")
       : null,
diff --git a/app/models/admin_dashboard_data.rb b/app/models/admin_dashboard_data.rb
index 0d26081..c558954 100644
--- a/app/models/admin_dashboard_data.rb
+++ b/app/models/admin_dashboard_data.rb
@@ -96,7 +96,7 @@ class AdminDashboardData
                       :image_magick_check, :failing_emails_check,
                       :subfolder_ends_in_slash_check,
                       :pop3_polling_configuration, :email_polling_errored_recently,
-                      :out_of_date_themes, :unreachable_themes
+                      :out_of_date_themes, :unreachable_themes, :watched_words_check
 
     add_problem_check do
       sidekiq_check || queue_size_check
@@ -224,6 +224,17 @@ class AdminDashboardData
     I18n.t('dashboard.force_https_warning', base_path: Discourse.base_path) unless SiteSetting.force_https
   end
 
+  def watched_words_check
+    WatchedWord.actions.keys.each do |action|
+      begin
+        WordWatcher.word_matcher_regexp(action, raise_errors: true)
+      rescue RegexpError => e
+        return I18n.t('dashboard.watched_word_regexp_error', base_path: Discourse.base_path, action: action)
+      end
+    end
+    nil
+  end
+
   def out_of_date_themes
     old_themes = RemoteTheme.out_of_date_themes
     return unless old_themes.present?
diff --git a/app/serializers/site_serializer.rb b/app/serializers/site_serializer.rb
index 1b08914..13859d7 100644
--- a/app/serializers/site_serializer.rb
+++ b/app/serializers/site_serializer.rb
@@ -30,7 +30,7 @@ class SiteSerializer < ApplicationSerializer
     :wizard_required,
     :topic_featured_link_allowed_category_ids,
     :user_themes,
-    :censored_words,
+    :censored_regexp,
     :shared_drafts_category_id
   )
 
@@ -156,8 +156,8 @@ class SiteSerializer < ApplicationSerializer
     scope.topic_featured_link_allowed_category_ids
   end
 
-  def censored_words
-    WordWatcher.words_for_action(:censor).join('|')
+  def censored_regexp

[... diff too long, it was truncated ...]

GitHub sha: 39e0442d

1 Like