PERF: Use PG headlines for blurb generation and highlighting for search.

PERF: Use PG headlines for blurb generation and highlighting for search.

diff --git a/config/site_settings.yml b/config/site_settings.yml
index 09b886e..46699dc 100644
--- a/config/site_settings.yml
+++ b/config/site_settings.yml
@@ -1792,6 +1792,9 @@ backups:
     hidden: true
 
 search:
+  use_pg_headlines_for_excerpt:
+    default: false
+    hidden: true
   search_ranking_normalization:
     default: "0"
     hidden: true
diff --git a/lib/search.rb b/lib/search.rb
index 35adbf9..efc870b 100644
--- a/lib/search.rb
+++ b/lib/search.rb
@@ -2,6 +2,7 @@
 
 class Search
   DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/
+  HIGHLIGHT_CSS_CLASS = 'search-highlight'
 
   cattr_accessor :preloaded_topic_custom_fields
   self.preloaded_topic_custom_fields = Set.new
@@ -726,12 +727,18 @@ class Search
   def single_topic(id)
     if @opts[:restrict_to_archetype].present?
       archetype = @opts[:restrict_to_archetype] == Archetype.default ? Archetype.default : Archetype.private_message
-      post = Post.joins(:topic)
-        .where("topics.id = :id AND topics.archetype = :archetype AND posts.post_number = 1", id: id, archetype: archetype)
-        .first
+
+      post = posts_scope
+        .joins(:topic)
+        .find_by(
+          "topics.id = :id AND topics.archetype = :archetype AND posts.post_number = 1",
+          id: id,
+          archetype: archetype
+        )
     else
-      post = Post.find_by(topic_id: id, post_number: 1)
+      post = posts_scope.find_by(topic_id: id, post_number: 1)
     end
+
     return nil unless @guardian.can_see?(post)
 
     @results.add(post)
@@ -1096,7 +1103,7 @@ class Search
   def aggregate_posts(post_sql)
     return [] unless post_sql
 
-    posts_eager_loads(Post)
+    posts_scope(posts_eager_loads(Post))
       .joins("JOIN (#{post_sql}) x ON x.id = posts.topic_id AND x.post_number = posts.post_number")
       .order('row_number')
   end
@@ -1128,7 +1135,7 @@ class Search
 
   def topic_search
     if @search_context.is_a?(Topic)
-      posts = posts_eager_loads(posts_query(limit))
+      posts = posts_scope(posts_eager_loads(posts_query(limit)))
         .where('posts.topic_id = ?', @search_context.id)
 
       posts.each do |post|
@@ -1150,4 +1157,17 @@ class Search
     query.includes(topic: topic_eager_loads)
   end
 
+  def posts_scope(default_scope = Post.all)
+    if SiteSetting.use_pg_headlines_for_excerpt
+      default_scope
+        .joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id")
+        .select(
+          "TS_HEADLINE(#{default_ts_config}, pd.raw_data, PLAINTO_TSQUERY('#{@term.present? ? PG::Connection.escape_string(@term) : nil}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
+          default_scope.arel.projections
+        )
+    else
+      default_scope
+    end
+  end
+
 end
diff --git a/lib/search/grouped_search_results.rb b/lib/search/grouped_search_results.rb
index ba112b5..cac0c52 100644
--- a/lib/search/grouped_search_results.rb
+++ b/lib/search/grouped_search_results.rb
@@ -85,8 +85,12 @@ class Search
       }
 
       if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION
-        opts[:cooked] = post.post_search_data.raw_data
-        opts[:scrub] = false
+        if SiteSetting.use_pg_headlines_for_excerpt
+          return post.headline
+        else
+          opts[:cooked] = post.post_search_data.raw_data
+          opts[:scrub] = false
+        end
       else
         opts[:cooked] = post.cooked
       end
diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb
index d0def19..5a7de3b 100644
--- a/spec/components/search_spec.rb
+++ b/spec/components/search_spec.rb
@@ -410,27 +410,31 @@ describe Search do
     end
 
     let(:expected_blurb) do
-      "...quire content longer than the typical test post raw content. It really is some long content, folks. elephant"
+      "hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class=\"search-highlight\">elephant</span>"
     end
 
     it 'returns the post' do
+      SiteSetting.use_pg_headlines_for_excerpt = true
+
       result = Search.execute('elephant',
         type_filter: 'topic',
         include_blurbs: true
       )
 
-      expect(result.posts).to contain_exactly(reply)
-      expect(result.blurb(reply)).to eq(expected_blurb)
+      expect(result.posts.map(&:id)).to contain_exactly(reply.id)
+      expect(result.blurb(result.posts.first)).to eq(expected_blurb)
     end
 
     it 'returns the right post and blurb for searches with phrase' do
+      SiteSetting.use_pg_headlines_for_excerpt = true
+
       result = Search.execute('"elephant"',
         type_filter: 'topic',
         include_blurbs: true
       )
 
-      expect(result.posts).to contain_exactly(reply)
-      expect(result.blurb(reply)).to eq(expected_blurb)
+      expect(result.posts.map(&:id)).to contain_exactly(reply.id)
+      expect(result.blurb(result.posts.first)).to eq(expected_blurb)
     end
 
     it 'applies a small penalty to closed topic when ranking' do
diff --git a/spec/requests/search_controller_spec.rb b/spec/requests/search_controller_spec.rb
index bb49a6a..7a54ced 100644
--- a/spec/requests/search_controller_spec.rb
+++ b/spec/requests/search_controller_spec.rb
@@ -99,6 +99,8 @@ describe SearchController do
     end
 
     it "can search correctly" do
+      SiteSetting.use_pg_headlines_for_excerpt = true
+
       get "/search/query.json", params: {
         term: 'awesome'
       }
@@ -109,11 +111,11 @@ describe SearchController do
 
       expect(data['posts'].length).to eq(2)
       expect(data['posts'][0]['id']).to eq(awesome_post_2.id)
-      expect(data['posts'][0]['blurb']).to eq(awesome_post_2.raw)
+      expect(data['posts'][0]['blurb']).to eq("this is my really <span class=\"#{Search::HIGHLIGHT_CSS_CLASS}\">awesome</span> post")
       expect(data['topics'][0]['id']).to eq(awesome_post_2.topic_id)
 
       expect(data['posts'][1]['id']).to eq(awesome_post.id)
-      expect(data['posts'][1]['blurb']).to eq(awesome_post.raw)
+      expect(data['posts'][1]['blurb']).to eq("this is my really <span class=\"#{Search::HIGHLIGHT_CSS_CLASS}\">awesome</span> post")
       expect(data['topics'][1]['id']).to eq(awesome_post.topic_id)
     end
 

GitHub sha: 2193d024