FIX: Limit PG headline based search blurb generation to 200 characters.

FIX: Limit PG headline based search blurb generation to 200 characters.

  • Recovers omission characters ‘…’ in blurb as well.
diff --git a/lib/search.rb b/lib/search.rb
index 16b813d..c2b4a13 100644
--- a/lib/search.rb
+++ b/lib/search.rb
@@ -1177,8 +1177,28 @@ class Search
         .joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id")
         .joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
         .select(
-          "TS_HEADLINE(#{ts_config}, t1.fancy_title, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS topic_title_headline",
-          "TS_HEADLINE(#{ts_config}, LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
+          "TS_HEADLINE(
+            #{ts_config},
+            t1.fancy_title,
+            PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
+            'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
+          ) AS topic_title_headline",
+          "TS_HEADLINE(
+            #{ts_config},
+            LEFT(
+              TS_HEADLINE(
+                #{ts_config},
+                LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}),
+                PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
+                'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel='''''
+              ),
+              #{Search::GroupedSearchResults::BLURB_LENGTH}
+            ),
+            PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
+            'HighlightAll=true, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
+          ) AS headline",
+          "LEFT(pd.raw_data, 50) AS leading_raw_data",
+          "RIGHT(pd.raw_data, 50) AS trailing_raw_data",
           default_scope.arel.projections
         )
     else
diff --git a/lib/search/grouped_search_results.rb b/lib/search/grouped_search_results.rb
index cac0c52..470a732 100644
--- a/lib/search/grouped_search_results.rb
+++ b/lib/search/grouped_search_results.rb
@@ -78,6 +78,9 @@ class Search
       end
     end
 
+    OMISSION = '...'
+    SCRUB_HEADLINE_REGEXP = /<span(?: \w+="[^"]+")* class="#{Search::HIGHLIGHT_CSS_CLASS}"(?: \w+="[^"]+")*>([^<]*)<\/span>/
+
     def blurb(post)
       opts = {
         term: @blurb_term,
@@ -86,7 +89,10 @@ class Search
 
       if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION
         if SiteSetting.use_pg_headlines_for_excerpt
-          return post.headline
+          scrubbed_headline = post.headline.gsub(SCRUB_HEADLINE_REGEXP, '\1')
+          prefix_omission = scrubbed_headline.start_with?(post.leading_raw_data) ? '' : OMISSION
+          postfix_omission = scrubbed_headline.end_with?(post.trailing_raw_data) ? '' : OMISSION
+          return "#{prefix_omission}#{post.headline}#{postfix_omission}"
         else
           opts[:cooked] = post.post_search_data.raw_data
           opts[:scrub] = false
diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb
index 2588bc2..3afe2c8 100644
--- a/spec/components/search_spec.rb
+++ b/spec/components/search_spec.rb
@@ -410,7 +410,7 @@ describe Search do
     end
 
     let(:expected_blurb) do
-      "hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class=\"search-highlight\">elephant</span>"
+      "#{Search::GroupedSearchResults::OMISSION}hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class=\"#{Search::HIGHLIGHT_CSS_CLASS}\">elephant</span>"
     end
 
     it 'returns the post' do
@@ -429,7 +429,7 @@ describe Search do
       expect(post.topic_title_headline).to eq(topic.fancy_title)
     end
 
-    it "it limits the headline to #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do
+    it "only applies highlighting to the first #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do
       SiteSetting.use_pg_headlines_for_excerpt = true
 
       reply.update!(raw: "#{'a' * Search::MAX_LENGTH_FOR_HEADLINE} #{reply.raw}")
@@ -443,6 +443,20 @@ describe Search do
       expect(post.headline.include?('elephant')).to eq(false)
     end
 
+    it "limits the search headline to #{Search::GroupedSearchResults::BLURB_LENGTH} characters" do
+      SiteSetting.use_pg_headlines_for_excerpt = true
+
+      reply.update!(raw: "#{'a' * Search::GroupedSearchResults::BLURB_LENGTH} elephant")
+
+      result = Search.execute('elephant')
+
+      expect(result.posts.map(&:id)).to contain_exactly(reply.id)
+
+      post = result.posts.first
+
+      expect(result.blurb(post)).to eq("#{'a' * Search::GroupedSearchResults::BLURB_LENGTH}#{Search::GroupedSearchResults::OMISSION}")
+    end
+
     it 'returns the right post and blurb for searches with phrase' do
       SiteSetting.use_pg_headlines_for_excerpt = true
 

GitHub sha: 93f8396b