FEATURE: when blocking emails prefer blocking canonical

FEATURE: when blocking emails prefer blocking canonical

Previously we relied entirely on levenshtein_distance_spammer_emails site setting to handle “similar looking” emails.

This commit improves the situation by always preferring to block (and check) canonical emails.

This means that if:

samevil+test@domain.com is blocked the system will block samevil@domain.com

This means that samevil+2@domain.com (ad infinitum) will be blocked

diff --git a/app/models/screened_email.rb b/app/models/screened_email.rb
index 73119db..3f8a604 100644
--- a/app/models/screened_email.rb
+++ b/app/models/screened_email.rb
@@ -18,11 +18,24 @@ class ScreenedEmail < ActiveRecord::Base
     self.email = email.downcase
   end
 
+  def self.canonical(email)
+    name, domain = email.split('@', 2)
+    name = name.gsub(/\+.*/, '')
+    if ['gmail.com', 'googlemail.com'].include?(domain.downcase)
+      name = name.gsub('.', '')
+    end
+    "#{name}@#{domain}".downcase
+  end
+
   def self.block(email, opts = {})
-    find_by_email(Email.downcase(email)) || create(opts.slice(:action_type, :ip_address).merge(email: email))
+    email = canonical(email)
+    find_by_email(email) || create!(opts.slice(:action_type, :ip_address).merge(email: email))
   end
 
   def self.should_block?(email)
+
+    email = canonical(email)
+
     screened_emails = ScreenedEmail.order(created_at: :desc).limit(100)
 
     distances = {}
diff --git a/spec/models/screened_email_spec.rb b/spec/models/screened_email_spec.rb
index aeefe19..2b33144 100644
--- a/spec/models/screened_email_spec.rb
+++ b/spec/models/screened_email_spec.rb
@@ -26,6 +26,7 @@ describe ScreenedEmail do
 
   describe '#block' do
     context 'email is not being blocked' do
+
       it 'creates a new record with default action of :block' do
         record = ScreenedEmail.block(email)
         expect(record).not_to be_new_record
@@ -57,6 +58,14 @@ describe ScreenedEmail do
   describe '#should_block?' do
     subject { ScreenedEmail.should_block?(email) }
 
+    it "automatically blocks via email canonicalization" do
+      SiteSetting.levenshtein_distance_spammer_emails = 0
+      ScreenedEmail.block('bad.acTor+1@gmail.com')
+      ScreenedEmail.block('bad.actOr+2@gmail.com')
+
+      expect(ScreenedEmail.should_block?('b.a.dactor@gmail.com')).to eq(true)
+    end
+
     it "returns false if a record with the email doesn't exist" do
       expect(subject).to eq(false)
     end

GitHub sha: cbceadf4

This commit has been mentioned on Discourse Meta. There might be relevant details there:

https://meta.discourse.org/t/gmail-dot-trick/22627/88