PERF: Create a partial regular post_search_data index on large sites.

PERF: Create a partial regular post_search_data index on large sites.

With the addition of PostSearchData#private_message, a partial index consisting of only search data from regular posts can be created. The partial index helps to speed up searches on large sites since PG will not have to do an index scan on the entire search data index which has shown to be a bottle neck.

diff --git a/app/jobs/scheduled/create_recent_post_search_indexes.rb b/app/jobs/scheduled/create_recent_post_search_indexes.rb
new file mode 100644
index 0000000..14fb311
--- /dev/null
+++ b/app/jobs/scheduled/create_recent_post_search_indexes.rb
@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+
+module Jobs
+  class CreateRecentPostSearchIndexes < ::Jobs::Scheduled
+    every 1.day
+
+    REGULAR_POST_SEARCH_DATA_INDEX_NAME = 'idx_recent_regular_post_search_data'
+
+    def execute(_)
+      create_recent_regular_post_search_index
+    end
+
+    private
+
+    def create_recent_regular_post_search_index
+      if !PostSearchData.where(private_message: false).offset(SiteSetting.search_enable_recent_regular_posts_offset_size - 1).limit(1).exists?
+        return
+      end
+
+      SiteSetting.search_recent_regular_posts_offset_post_id = self.regular_offset_post_id
+
+      DB.exec(<<~SQL)
+      DROP INDEX IF EXISTS temp_idx_recent_regular_post_search_data;
+      SQL
+
+      DB.exec(<<~SQL, post_id: SiteSetting.search_recent_regular_posts_offset_post_id)
+      CREATE INDEX #{Rails.env.test? ? '' : 'CONCURRENTLY'} temp_idx_recent_regular_post_search_data
+      ON post_search_data USING GIN(search_data)
+      WHERE NOT private_message AND post_id >= :post_id
+      SQL
+
+      DB.exec(<<~SQL)
+      #{Rails.env.test? ? '' : "BEGIN;"}
+      DROP INDEX IF EXISTS #{REGULAR_POST_SEARCH_DATA_INDEX_NAME};
+      ALTER INDEX temp_idx_recent_regular_post_search_data RENAME TO #{REGULAR_POST_SEARCH_DATA_INDEX_NAME};
+      #{Rails.env.test? ? '' : "COMMIT;"}
+      SQL
+    end
+
+    def regular_offset_post_id
+      PostSearchData
+        .order('post_id DESC')
+        .where(private_message: false)
+        .offset(SiteSetting.search_recent_posts_size - 1)
+        .limit(1)
+        .pluck(:post_id)
+        .first
+    end
+  end
+end
diff --git a/config/site_settings.yml b/config/site_settings.yml
index e33e84e..93cc9c5 100644
--- a/config/site_settings.yml
+++ b/config/site_settings.yml
@@ -1824,6 +1824,12 @@ search:
   search_recent_posts_size:
     default: 100000
     max: 100000
+  search_recent_regular_posts_offset_post_id:
+    default: 0
+    hidden: true
+  search_enable_recent_regular_posts_offset_size:
+    default: 200000
+    hidden: true
   log_search_queries:
     client: true
     default: true
diff --git a/lib/search.rb b/lib/search.rb
index 94f4777..9180e1a 100644
--- a/lib/search.rb
+++ b/lib/search.rb
@@ -1090,7 +1090,18 @@ class Search
       type_filter: opts[:type_filter]
     }
 
-    min_id = Search.min_post_id
+    min_id =
+      if SiteSetting.search_recent_regular_posts_offset_post_id > 0
+        if %w{all_topics private_message}.include?(opts[:type_filter])
+          0
+        else
+          SiteSetting.search_recent_regular_posts_offset_post_id
+        end
+      else
+        # This is kept around for backwards compatibility.
+        # TODO: Drop this code path after Discourse 2.7 has been released.
+        Search.min_post_id
+      end
 
     if @order == :likes
       # likes are a pain to aggregate so skip
diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb
index 57d528e..06eb0f5 100644
--- a/spec/components/search_spec.rb
+++ b/spec/components/search_spec.rb
@@ -543,27 +543,24 @@ describe Search do
       ])
     end
 
-    it "allows the configuration of search to prefer recent posts" do
-      SiteSetting.search_prefer_recent_posts = true
-      SiteSetting.search_recent_posts_size = 1
-      post = Fabricate(:post, topic: topic, raw: "this is a play post")
+    it "is able to search with an offset when configured" do
+      post_1 = Fabricate(:post, raw: "this is a play post")
+      SiteSetting.search_recent_regular_posts_offset_post_id = post_1.id + 1
 
       results = Search.execute('play post')
 
-      expect(results.posts.map(&:id)).to eq([
-        post.id
-      ])
+      expect(results.posts).to eq([post_1])
+
+      post_2 = Fabricate(:post, raw: "this is another play post")
 
-      post2 = Fabricate(:post, raw: "this is a play post")
+      SiteSetting.search_recent_regular_posts_offset_post_id = post_2.id
 
       results = Search.execute('play post')
 
       expect(results.posts.map(&:id)).to eq([
-        post2.id,
-        post.id
+        post_2.id,
+        post_1.id
       ])
-    ensure
-      Discourse.cache.clear
     end
 
     it 'allows staff to search for whispers' do
diff --git a/spec/jobs/create_recent_post_search_indexes_spec.rb b/spec/jobs/create_recent_post_search_indexes_spec.rb
new file mode 100644
index 0000000..dbe2e3e
--- /dev/null
+++ b/spec/jobs/create_recent_post_search_indexes_spec.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+
+describe Jobs::CreateRecentPostSearchIndexes do
+  subject { described_class.new }
+
+  fab!(:post) do
+    SearchIndexer.enable
+    Fabricate(:post)
+  end
+
+  fab!(:post_2) do
+    SearchIndexer.enable
+    Fabricate(:post)
+  end
+
+  before do
+    SearchIndexer.enable
+  end
+
+  describe '#execute' do
+    it 'should not create the index if requried posts size has not been reached' do
+      SiteSetting.search_recent_posts_size = 1
+      SiteSetting.search_enable_recent_regular_posts_offset_size = 3
+
+      expect do
+        subject.execute({})
+      end.to_not change { SiteSetting.search_recent_regular_posts_offset_post_id }
+    end
+
+    it 'should create the right index' do
+      SiteSetting.search_recent_posts_size = 1
+      SiteSetting.search_enable_recent_regular_posts_offset_size = 1
+
+      subject.execute({})
+
+      expect(SiteSetting.search_recent_regular_posts_offset_post_id).to eq(post_2.id)
+
+      expect(DB.query_single(<<~SQL).first).to eq(1)
+      SELECT 1 FROM pg_indexes WHERE indexname = '#{described_class::REGULAR_POST_SEARCH_DATA_INDEX_NAME}'
+      SQL
+    end
+  end
+end

GitHub sha: 40c6d90d

This commit appears in #10504 which was approved by eviltrout. It was merged by tgxworld.