PERF: Delete search data of posts from trashed topics periodically. (#7302)

PERF: Delete search data of posts from trashed topics periodically. (#7302)

This keeps both the index and table smaller.

diff --git a/app/jobs/scheduled/reindex_search.rb b/app/jobs/scheduled/reindex_search.rb
index b6dfc1e..1a15ec5 100644
--- a/app/jobs/scheduled/reindex_search.rb
+++ b/app/jobs/scheduled/reindex_search.rb
@@ -66,6 +66,18 @@ module Jobs
         .joins("LEFT JOIN posts p ON p.id = post_search_data.post_id")
         .where("p.raw = ''")
         .delete_all
+
+      DB.exec(<<~SQL, deleted_at: 1.week.ago)
+        DELETE FROM post_search_data
+        WHERE post_id IN (
+          SELECT post_id
+          FROM post_search_data
+          LEFT JOIN posts ON post_search_data.post_id = posts.id
+          INNER JOIN topics ON posts.topic_id = topics.id
+          WHERE topics.deleted_at IS NOT NULL
+          AND topics.deleted_at <= :deleted_at
+        )
+      SQL
     end
 
     def load_problem_post_ids(limit)
diff --git a/spec/jobs/reindex_search_spec.rb b/spec/jobs/reindex_search_spec.rb
index 3df9fa2..df63466 100644
--- a/spec/jobs/reindex_search_spec.rb
+++ b/spec/jobs/reindex_search_spec.rb
@@ -79,15 +79,33 @@ describe Jobs::ReindexSearch do
   end
 
   describe '#execute' do
-    it "should clean up post_search_data of posts with empty raw" do
+    it(
+      "should clean up post_search_data of posts with empty raw or posts from " \
+      "trashed topics"
+    ) do
+
       post = Fabricate(:post)
       post2 = Fabricate(:post, post_type: Post.types[:small_action])
       post2.raw = ""
       post2.save!(validate: false)
+      post3 = Fabricate(:post)
+      post3.topic.trash!
+      post4 = nil
+
+      freeze_time(1.week.ago) do
+        post4 = Fabricate(:post)
+        post4.topic.trash!
+      end
+
+      expect { subject.execute({}) }.to change { PostSearchData.count }.by(-2)
 
-      expect { subject.execute({}) }.to change { PostSearchData.count }.by(-1)
-      expect(Post.all).to contain_exactly(post, post2)
-      expect(PostSearchData.all).to contain_exactly(post.post_search_data)
+      expect(Post.all.pluck(:id)).to contain_exactly(
+        post.id, post2.id, post3.id, post4.id
+      )
+
+      expect(PostSearchData.all.pluck(:post_id)).to contain_exactly(
+        post.post_search_data.post_id, post3.post_search_data.post_id
+      )
     end
   end
 end

GitHub sha: d1514253

This commit has been mentioned on Discourse Meta. There might be relevant details there: