FIX: Ensure same entities are not checked twice (#22)

FIX: Ensure same entities are not checked twice (#22)

Multiple instances of the same scheduled job can run at the same time if the execution time of an instance is longer than the schedule interval.

The list of checked entities is fetched when the job begins which combined with multiple instances resulted in the same entities being checked multiple times.

diff --git a/jobs/regular/check_akismet_post.rb b/jobs/regular/check_akismet_post.rb
index 0b5e06c..6dec5c1 100644
--- a/jobs/regular/check_akismet_post.rb
+++ b/jobs/regular/check_akismet_post.rb
@@ -2,18 +2,16 @@
 
 module Jobs
   class CheckAkismetPost < ::Jobs::Base
-
-    # Check a single post for spam. We do this for TL0 to get a faster response
-    # without batching.
     def execute(args)
       return unless SiteSetting.akismet_enabled?
-
       return unless post = Post.find_by(id: args[:post_id], user_deleted: false)
-
       return if Reviewable.exists?(target: post)
 
-      client = Akismet::Client.build_client
-      DiscourseAkismet::PostsBouncer.new.perform_check(client, post)
+      DistributedMutex.synchronize("akismet_post_#{post.id}") do
+        if post.custom_fields[DiscourseAkismet::Bouncer::AKISMET_STATE] == 'new'
+          DiscourseAkismet::PostsBouncer.new.perform_check(Akismet::Client.build_client, post)
+        end
+      end
     end
   end
 end
diff --git a/jobs/regular/check_akismet_user.rb b/jobs/regular/check_akismet_user.rb
new file mode 100644
index 0000000..6cc591f
--- /dev/null
+++ b/jobs/regular/check_akismet_user.rb
@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+module Jobs
+  class CheckAkismetUser < ::Jobs::Base
+    def execute(args)
+      return unless SiteSetting.akismet_enabled?
+      return unless user = User.includes(:user_profile).find_by(id: args[:user_id])
+      return if Reviewable.exists?(target: user)
+
+      DistributedMutex.synchronize("akismet_user_#{user.id}") do
+        if user.custom_fields[DiscourseAkismet::Bouncer::AKISMET_STATE] == 'new'
+          DiscourseAkismet::UsersBouncer.new.perform_check(Akismet::Client.build_client, user)
+        end
+      end
+    end
+  end
+end
diff --git a/jobs/regular/check_users_for_spam.rb b/jobs/regular/check_users_for_spam.rb
deleted file mode 100644
index 2b74aa7..0000000
--- a/jobs/regular/check_users_for_spam.rb
+++ /dev/null
@@ -1,13 +0,0 @@
-# frozen_string_literal: true
-
-module Jobs
-  class CheckUsersForSpam < ::Jobs::Base
-    def execute(args)
-      user = User.includes(:user_profile).find_by(id: args[:user_id])
-      return if user.nil?
-
-      client = Akismet::Client.build_client
-      DiscourseAkismet::UsersBouncer.new.perform_check(client, user)
-    end
-  end
-end
diff --git a/jobs/scheduled/check_for_spam_posts.rb b/jobs/scheduled/check_for_spam_posts.rb
index 19fa923..0ef9b5c 100644
--- a/jobs/scheduled/check_for_spam_posts.rb
+++ b/jobs/scheduled/check_for_spam_posts.rb
@@ -8,19 +8,18 @@ module Jobs
       return unless SiteSetting.akismet_enabled?
       return if SiteSetting.akismet_api_key.blank?
 
-      # Users above TL0 are checked in batches
-      to_check = DiscourseAkismet::PostsBouncer.to_check
-        .includes(post: :user)
-        .where('users.trust_level > 0')
-        .where('posts.user_deleted = false').map(&:post)
-
-      spam_count = 0
       bouncer = DiscourseAkismet::PostsBouncer.new
       client = Akismet::Client.build_client
+      spam_count = 0
 
-      [to_check].flatten.each do |post|
-        result = bouncer.perform_check(client, post)
-        spam_count += 1 if result
+      DiscourseAkismet::PostsBouncer.to_check
+        .where(user_deleted: false)
+        .find_each do |post|
+        DistributedMutex.synchronize("akismet_post_#{post.id}") do
+          if post.custom_fields[DiscourseAkismet::Bouncer::AKISMET_STATE] == 'new'
+            spam_count += 1 if bouncer.perform_check(client, post)
+          end
+        end
       end
 
       # Trigger an event that akismet found spam. This allows people to
diff --git a/jobs/scheduled/check_for_spam_users.rb b/jobs/scheduled/check_for_spam_users.rb
index 3e3856c..049a81e 100644
--- a/jobs/scheduled/check_for_spam_users.rb
+++ b/jobs/scheduled/check_for_spam_users.rb
@@ -2,16 +2,23 @@
 
 module Jobs
   class CheckForSpamUsers < ::Jobs::Scheduled
-    every 20.minutes
+    every 10.minutes
 
     def execute(args)
       return unless SiteSetting.akismet_enabled?
       return if SiteSetting.akismet_api_key.blank?
+
       bouncer = DiscourseAkismet::UsersBouncer.new
       client = Akismet::Client.build_client
 
-      DiscourseAkismet::UsersBouncer.to_check.includes(:user_profile).find_each do |user|
-        bouncer.perform_check(client, user)
+      DiscourseAkismet::UsersBouncer.to_check
+        .includes(:user_profile)
+        .find_each do |user|
+        DistributedMutex.synchronize("akismet_user_#{user.id}") do
+          if user.custom_fields[DiscourseAkismet::Bouncer::AKISMET_STATE] == 'new'
+            bouncer.perform_check(client, user)
+          end
+        end
       end
     end
   end
diff --git a/lib/discourse_akismet/posts_bouncer.rb b/lib/discourse_akismet/posts_bouncer.rb
index 3299143..4a93775 100644
--- a/lib/discourse_akismet/posts_bouncer.rb
+++ b/lib/discourse_akismet/posts_bouncer.rb
@@ -13,13 +13,14 @@ module DiscourseAkismet
     @@munger = nil
 
     def self.to_check
-      PostCustomField.where(name: 'AKISMET_STATE', value: 'new')
-        .where('posts.id IS NOT NULL')
-        .where('topics.id IS NOT NULL')
+      Post
+        .joins('INNER JOIN post_custom_fields ON posts.id = post_custom_fields.post_id')
         .joins('LEFT OUTER JOIN reviewables ON reviewables.target_id = post_custom_fields.post_id')
+        .where('post_custom_fields.name = ?', AKISMET_STATE)
+        .where('post_custom_fields.value = ?', 'new')
         .where('reviewables.id IS NULL')
-        .includes(post: :topic)
-        .references(:post, :topic)
+        .includes(:topic)
+        .references(:topic)
     end
 
     def suspect?(post)
diff --git a/lib/discourse_akismet/users_bouncer.rb b/lib/discourse_akismet/users_bouncer.rb
index c2e235a..13ee9ae 100644
--- a/lib/discourse_akismet/users_bouncer.rb
+++ b/lib/discourse_akismet/users_bouncer.rb
@@ -44,7 +44,7 @@ module DiscourseAkismet
     private
 
     def enqueue_job(user)
-      Jobs.enqueue(:check_users_for_spam, user_id: user.id)
+      Jobs.enqueue(:check_akismet_user, user_id: user.id)
     end
 
     def before_check(user)
diff --git a/plugin.rb b/plugin.rb
index 75e3630..58dcc1e 100644
--- a/plugin.rb
+++ b/plugin.rb
@@ -18,13 +18,13 @@ register_asset "stylesheets/akismet-icon.scss"
 
 after_initialize do
   %W[
+    jobs/regular/check_akismet_post
+    jobs/regular/check_akismet_user
+    jobs/regular/confirm_akismet_flagged_posts
+    jobs/regular/update_akismet_status
     jobs/scheduled/check_for_spam_posts
     jobs/scheduled/check_for_spam_users
     jobs/scheduled/clean_old_akismet_custom_fields
-    jobs/regular/check_users_for_spam
-    jobs/regular/confirm_akismet_flagged_posts
-    jobs/regular/check_akismet_post
-    jobs/regular/update_akismet_status
     models/reviewable_akismet_post
     models/reviewable_akismet_user
     serializers/reviewable_akismet_post_serializer
diff --git a/spec/lib/posts_bouncer_spec.rb b/spec/lib/posts_bouncer_spec.rb
index 6a6f844..59e47ec 100644
--- a/spec/lib/posts_bouncer_spec.rb
+++ b/spec/lib/posts_bouncer_spec.rb
@@ -192,7 +192,7 @@ describe DiscourseAkismet::PostsBouncer do
     it 'retrieves posts waiting to be reviewed by Akismet' do
       subject.move_to_state(post, 'new')
 
-      posts_to_check = described_class.to_check.map(&:post)
+      posts_to_check = described_class.to_check
 
       expect(posts_to_check).to contain_exactly(post)
     end
diff --git a/spec/lib/users_bouncer_spec.rb b/spec/lib/users_bouncer_spec.rb
index 9343b71..52c62ae 100644
--- a/spec/lib/users_bouncer_spec.rb
+++ b/spec/lib/users_bouncer_spec.rb
@@ -82,7 +82,7 @@ RSpec.describe DiscourseAkismet::UsersBouncer do
       expect {
         subject.enqueue_for_check(user)
       }.to change {
-        Jobs::CheckUsersForSpam.jobs.size
+        Jobs::CheckAkismetUser.jobs.size
       }.by(1)
     end
   end
diff --git a/spec/models/user_profile_spec.rb b/spec/models/user_profile_spec.rb
index a7fbe81..85b8977 100644

[... diff too long, it was truncated ...]

GitHub sha: 75872e7e

This commit appears in #22 which was approved by ZogStriP. It was merged by udan11.