PERF: Disable Sidekiq only during database restore (#10857)

PERF: Disable Sidekiq only during database restore (#10857)

It pauses Sidekiq, clears Redis (namespaced to the current site), clears Sidekiq jobs for the current site, restores the database and unpauses Sidekiq. Previously it stayed paused until the end of the restore.

Redis is cleared because we don’t want any old data lying around (e.g. old Sidekiq jobs). Most data in Redis is prefixed with the name of the multisite, but Sidekiq jobs in a multisite are all stored in the same keys. So, deleting those jobs requires a little bit more logic.

diff --git a/lib/backup_restore/restorer.rb b/lib/backup_restore/restorer.rb
index a6aba0b..ce22488 100644
--- a/lib/backup_restore/restorer.rb
+++ b/lib/backup_restore/restorer.rb
@@ -43,13 +43,16 @@ module BackupRestore
       validate_backup_metadata
 
       @system.enable_readonly_mode
-      @system.pause_sidekiq
+      @system.pause_sidekiq("restore")
       @system.wait_for_sidekiq
+      @system.flush_redis
+      @system.clear_sidekiq_queues
 
       @database_restorer.restore(db_dump_path)
 
       reload_site_settings
 
+      @system.unpause_sidekiq
       @system.disable_readonly_mode
 
       clear_category_cache
diff --git a/lib/backup_restore/system_interface.rb b/lib/backup_restore/system_interface.rb
index fec6735..377d40a 100644
--- a/lib/backup_restore/system_interface.rb
+++ b/lib/backup_restore/system_interface.rb
@@ -54,12 +54,16 @@ module BackupRestore
       end
     end
 
-    def pause_sidekiq
+    def pause_sidekiq(reason)
+      return if Sidekiq.paused?
+
       log "Pausing sidekiq..."
-      Sidekiq.pause!
+      Sidekiq.pause!(reason)
     end
 
     def unpause_sidekiq
+      return unless Sidekiq.paused?
+
       log "Unpausing sidekiq..."
       Sidekiq.unpause!
     rescue => ex
@@ -88,6 +92,23 @@ module BackupRestore
       end
     end
 
+    def flush_redis
+      redis = Discourse.redis
+      redis.scan_each(match: "*") do |key|
+        redis.del(key) unless key == SidekiqPauser::PAUSED_KEY
+      end
+    end
+
+    def clear_sidekiq_queues
+      Sidekiq::Queue.all.each do |queue|
+        queue.each { |job| delete_job_if_it_belongs_to_current_site(job) }
+      end
+
+      Sidekiq::RetrySet.new.each { |job| delete_job_if_it_belongs_to_current_site(job) }
+      Sidekiq::ScheduledSet.new.each { |job| delete_job_if_it_belongs_to_current_site(job) }
+      Sidekiq::DeadSet.new.each { |job| delete_job_if_it_belongs_to_current_site(job) }
+    end
+
     protected
 
     def sidekiq_has_running_jobs?
@@ -100,5 +121,9 @@ module BackupRestore
 
       false
     end
+
+    def delete_job_if_it_belongs_to_current_site(job)
+      job.delete if job.args.first&.fetch("current_site_id", nil) == @current_db
+    end
   end
 end
diff --git a/spec/lib/backup_restore/system_interface_spec.rb b/spec/lib/backup_restore/system_interface_spec.rb
index d75de32..27d7759 100644
--- a/spec/lib/backup_restore/system_interface_spec.rb
+++ b/spec/lib/backup_restore/system_interface_spec.rb
@@ -10,7 +10,7 @@ describe BackupRestore::SystemInterface do
 
   context "readonly mode" do
     after do
-      Discourse::READONLY_KEYS.each { |key| $redis.del(key) }
+      Discourse::READONLY_KEYS.each { |key| Discourse.redis.del(key) }
     end
 
     describe "#enable_readonly_mode" do
@@ -81,16 +81,23 @@ describe BackupRestore::SystemInterface do
   end
 
   describe "#pause_sidekiq" do
+    after { Sidekiq.unpause! }
+
     it "calls pause!" do
-      Sidekiq.expects(:pause!).once
-      subject.pause_sidekiq
+      expect(Sidekiq.paused?).to eq(false)
+      subject.pause_sidekiq("my reason")
+      expect(Sidekiq.paused?).to eq(true)
+      expect(Discourse.redis.get(SidekiqPauser::PAUSED_KEY)).to eq("my reason")
     end
   end
 
   describe "#unpause_sidekiq" do
     it "calls unpause!" do
-      Sidekiq.expects(:unpause!).once
+      Sidekiq.pause!
+      expect(Sidekiq.paused?).to eq(true)
+
       subject.unpause_sidekiq
+      expect(Sidekiq.paused?).to eq(false)
     end
   end
 
@@ -101,12 +108,16 @@ describe BackupRestore::SystemInterface do
     end
 
     context "with Sidekiq workers" do
-      before { $redis.flushall }
-      after { $redis.flushall }
+      before { flush_sidekiq_redis_namespace }
+      after { flush_sidekiq_redis_namespace }
 
-      def create_workers(site_id: nil, all_sites: false)
-        $redis.flushall
+      def flush_sidekiq_redis_namespace
+        Sidekiq.redis do |redis|
+          redis.scan_each { |key| Discourse.redis.del(key) }
+        end
+      end
 
+      def create_workers(site_id: nil, all_sites: false)
         payload = Sidekiq::Testing.fake! do
           data = { post_id: 1 }
 
@@ -157,5 +168,46 @@ describe BackupRestore::SystemInterface do
         subject.wait_for_sidekiq
       end
     end
+
+    describe "flush_redis" do
+      context "Sidekiq" do
+        after { Sidekiq.unpause! }
+
+        it "doesn't unpause Sidekiq" do
+          Sidekiq.pause!
+          subject.flush_redis
+
+          expect(Sidekiq.paused?).to eq(true)
+        end
+      end
+
+      it "removes only keys from the current site in a multisite", type: :multisite do
+        test_multisite_connection("default") do
+          Discourse.redis.set("foo", "default-foo")
+          Discourse.redis.set("bar", "default-bar")
+
+          expect(Discourse.redis.get("foo")).to eq("default-foo")
+          expect(Discourse.redis.get("bar")).to eq("default-bar")
+        end
+
+        test_multisite_connection("second") do
+          Discourse.redis.set("foo", "second-foo")
+          Discourse.redis.set("bar", "second-bar")
+
+          expect(Discourse.redis.get("foo")).to eq("second-foo")
+          expect(Discourse.redis.get("bar")).to eq("second-bar")
+
+          subject.flush_redis
+
+          expect(Discourse.redis.get("foo")).to be_nil
+          expect(Discourse.redis.get("bar")).to be_nil
+        end
+
+        test_multisite_connection("default") do
+          expect(Discourse.redis.get("foo")).to eq("default-foo")
+          expect(Discourse.redis.get("bar")).to eq("default-bar")
+        end
+      end
+    end
   end
 end

GitHub sha: d5ef6188

This commit appears in #10857 which was approved by eviltrout. It was merged by gschlager.