PERF: Do not enqueue digest emails when attempted recently (#10849)

PERF: Do not enqueue digest emails when attempted recently (#10849)

Previously, Jobs::EnqueueDigestEmails would enqueue a digest job for every user, even if there are no topics to send. The digest job would exit, no email would send, and last_emailed_at would not change. 30 minutes later, Jobs::EnqueueDigestEmails would run again and re-enqueue jobs for the same users.

120fa8ad introduced a temporary mitigation for this issue, by randomly selecting a subset of those users each time.

This commit adds a new digest_attempted_at column to the user_stats table. This column is updated every time a digest job completes for a user. Using this, we can avoid scheduling digest jobs for the same user every 30 minutes. This also removes the random user selection in 120fa8ad, and instead prioritizes users who had digests attempted the longest time ago.

diff --git a/app/jobs/regular/user_email.rb b/app/jobs/regular/user_email.rb
index f9d9fd6..191d16c 100644
--- a/app/jobs/regular/user_email.rb
+++ b/app/jobs/regular/user_email.rb
@@ -22,6 +22,15 @@ module Jobs
       # of extra work when emails are disabled.
       return if quit_email_early?
 
+      send_user_email(args)
+
+      if args[:user_id].present? && args[:type] == :digest
+        # Record every attempt at sending a digest email, even if it was skipped
+        UserStat.where(user_id: args[:user_id]).update_all(digest_attempted_at: Time.zone.now)
+      end
+    end
+
+    def send_user_email(args)
       post = nil
       notification = nil
       type = args[:type]
diff --git a/app/jobs/scheduled/enqueue_digest_emails.rb b/app/jobs/scheduled/enqueue_digest_emails.rb
index d0fba28..4496f25 100644
--- a/app/jobs/scheduled/enqueue_digest_emails.rb
+++ b/app/jobs/scheduled/enqueue_digest_emails.rb
@@ -6,13 +6,9 @@ module Jobs
     every 30.minutes
 
     def execute(args)
-      return if SiteSetting.disable_digest_emails? || SiteSetting.private_email?
+      return if SiteSetting.disable_digest_emails? || SiteSetting.private_email? || SiteSetting.disable_emails == 'yes'
       users = target_user_ids
 
-      if users.length > GlobalSetting.max_digests_enqueued_per_30_mins_per_site
-        users = users.shuffle[0...GlobalSetting.max_digests_enqueued_per_30_mins_per_site]
-      end
-
       users.each do |user_id|
         ::Jobs.enqueue(:user_email, type: :digest, user_id: user_id)
       end
@@ -30,12 +26,16 @@ module Jobs
         .where("user_stats.bounce_score < #{SiteSetting.bounce_score_threshold}")
         .where("user_emails.primary")
         .where("COALESCE(last_emailed_at, '2010-01-01') <= CURRENT_TIMESTAMP - ('1 MINUTE'::INTERVAL * user_options.digest_after_minutes)")
+        .where("COALESCE(user_stats.digest_attempted_at, '2010-01-01') <= CURRENT_TIMESTAMP - ('1 MINUTE'::INTERVAL * user_options.digest_after_minutes)")
         .where("COALESCE(last_seen_at, '2010-01-01') <= CURRENT_TIMESTAMP - ('1 MINUTE'::INTERVAL * user_options.digest_after_minutes)")
         .where("COALESCE(last_seen_at, '2010-01-01') >= CURRENT_TIMESTAMP - ('1 DAY'::INTERVAL * #{SiteSetting.suppress_digest_email_after_days})")
+        .order("user_stats.digest_attempted_at ASC NULLS FIRST")
 
       # If the site requires approval, make sure the user is approved
       query = query.where("approved OR moderator OR admin") if SiteSetting.must_approve_users?
 
+      query = query.limit(GlobalSetting.max_digests_enqueued_per_30_mins_per_site)
+
       query.pluck(:id)
     end
 
diff --git a/app/models/user_stat.rb b/app/models/user_stat.rb
index bc2a9d2..364b4a2 100644
--- a/app/models/user_stat.rb
+++ b/app/models/user_stat.rb
@@ -290,4 +290,5 @@ end
 #  flags_ignored            :integer          default(0), not null
 #  first_unread_at          :datetime         not null
 #  distinct_badge_count     :integer          default(0), not null
+#  digest_attempted_at      :datetime
 #
diff --git a/db/migrate/20201007124955_add_digest_attempted_at_to_user_stats.rb b/db/migrate/20201007124955_add_digest_attempted_at_to_user_stats.rb
new file mode 100644
index 0000000..496e69b
--- /dev/null
+++ b/db/migrate/20201007124955_add_digest_attempted_at_to_user_stats.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+
+class AddDigestAttemptedAtToUserStats < ActiveRecord::Migration[6.0]
+  def change
+    add_column :user_stats, :digest_attempted_at, :timestamp
+  end
+end
diff --git a/spec/jobs/enqueue_digest_emails_spec.rb b/spec/jobs/enqueue_digest_emails_spec.rb
index 97ab4e3..b207d84 100644
--- a/spec/jobs/enqueue_digest_emails_spec.rb
+++ b/spec/jobs/enqueue_digest_emails_spec.rb
@@ -122,16 +122,29 @@ describe Jobs::EnqueueDigestEmails do
     let(:user) { Fabricate(:user) }
 
     it "limits jobs enqueued per max_digests_enqueued_per_30_mins_per_site" do
-      Fabricate(:user, last_seen_at: 2.months.ago, last_emailed_at: 2.months.ago)
-      Fabricate(:user, last_seen_at: 2.months.ago, last_emailed_at: 2.months.ago)
+      user1 = Fabricate(:user, last_seen_at: 2.months.ago, last_emailed_at: 2.months.ago)
+      user2 = Fabricate(:user, last_seen_at: 2.months.ago, last_emailed_at: 2.months.ago)
+
+      user1.user_stat.update(digest_attempted_at: 2.week.ago)
+      user2.user_stat.update(digest_attempted_at: 3.weeks.ago)
 
       global_setting :max_digests_enqueued_per_30_mins_per_site, 1
 
-      # I don't love fakes, but no point sending this fake email
-      Sidekiq::Testing.fake! do
-        expect do
-          Jobs::EnqueueDigestEmails.new.execute(nil)
-        end.to change(Jobs::UserEmail.jobs, :size).by (1)
+      expect_enqueued_with(job: :user_email, args: { type: :digest, user_id: user2.id }) do
+        expect { Jobs::EnqueueDigestEmails.new.execute(nil) }.to change(Jobs::UserEmail.jobs, :size).by (1)
+      end
+
+      # The job didn't actually run, so fake the user_stat update
+      user2.user_stat.update(digest_attempted_at: Time.zone.now)
+
+      expect_enqueued_with(job: :user_email, args: { type: :digest, user_id: user1.id }) do
+        expect { Jobs::EnqueueDigestEmails.new.execute(nil) }.to change(Jobs::UserEmail.jobs, :size).by (1)
+      end
+
+      user1.user_stat.update(digest_attempted_at: Time.zone.now)
+
+      expect_not_enqueued_with(job: :user_email) do
+        Jobs::EnqueueDigestEmails.new.execute(nil)
       end
     end
 
diff --git a/spec/jobs/user_email_spec.rb b/spec/jobs/user_email_spec.rb
index 4a2b40b..8caff3c 100644
--- a/spec/jobs/user_email_spec.rb
+++ b/spec/jobs/user_email_spec.rb
@@ -42,17 +42,20 @@ describe Jobs::UserEmail do
 
     context 'not emailed recently' do
       before do
+        freeze_time
         user.update!(last_emailed_at: 8.days.ago)
       end
 
       it "calls the mailer when the user exists" do
         Jobs::UserEmail.new.execute(type: :digest, user_id: user.id)
         expect(ActionMailer::Base.deliveries).to_not be_empty
+        expect(user.user_stat.reload.digest_attempted_at).to eq_time(Time.zone.now)
       end
     end
 
     context 'recently emailed' do
       before do
+        freeze_time
         user.update!(last_emailed_at: 2.hours.ago)
         user.user_option.update!(digest_after_minutes: 1.day.to_i / 60)
       end
@@ -60,6 +63,7 @@ describe Jobs::UserEmail do
       it 'skips sending digest email' do
         Jobs::UserEmail.new.execute(type: :digest, user_id: user.id)
         expect(ActionMailer::Base.deliveries).to eq([])
+        expect(user.user_stat.reload.digest_attempted_at).to eq_time(Time.zone.now)
       end
     end
   end

GitHub sha: c0293339

This commit appears in #10849 which was approved by CvX. It was merged by davidtaylorhq.