FIX: Improve `Topic.similar_to` with better `Topic#title` matches.

FIX: Improve Topic.similar_to with better Topic#title matches.

This changes PG text search to only match the given title against lexemes that are formed from the title. Likewise, the given raw will only be matched against lexemes that are formed from the post’s raw.

diff --git a/app/models/topic.rb b/app/models/topic.rb
index de3034e..883a35c 100644
--- a/app/models/topic.rb
+++ b/app/models/topic.rb
@@ -581,9 +581,17 @@ class Topic < ActiveRecord::Base
     return [] if title.blank?
     raw = raw.presence || ""
 
-    search_data = "#{title} #{raw[0...MAX_SIMILAR_BODY_LENGTH]}".strip
-    filter_words = Search.prepare_data(search_data)
-    ts_query = Search.ts_query(term: filter_words, joiner: "|")
+    title_tsquery = Search.set_tsquery_weight_filter(
+      Search.prepare_data(title.strip),
+      'A'
+    )
+
+    raw_tsquery =  Search.set_tsquery_weight_filter(
+      Search.prepare_data(raw[0...MAX_SIMILAR_BODY_LENGTH].strip),
+      'B'
+    )
+
+    tsquery = Search.to_tsquery(term: "#{title_tsquery} & #{raw_tsquery}", joiner: "|")
 
     candidates = Topic
       .visible
@@ -591,9 +599,9 @@ class Topic < ActiveRecord::Base
       .secured(Guardian.new(user))
       .joins("JOIN topic_search_data s ON topics.id = s.topic_id")
       .joins("LEFT JOIN categories c ON topics.id = c.topic_id")
-      .where("search_data @@ #{ts_query}")
+      .where("search_data @@ #{tsquery}")
       .where("c.topic_id IS NULL")
-      .order("ts_rank(search_data, #{ts_query}) DESC")
+      .order("ts_rank(search_data, #{tsquery}) DESC")
       .limit(SiteSetting.max_similar_results * 3)
 
     candidate_ids = candidates.pluck(:id)
diff --git a/lib/search.rb b/lib/search.rb
index 02462cd..ade1ee3 100644
--- a/lib/search.rb
+++ b/lib/search.rb
@@ -1025,13 +1025,25 @@ class Search
   end
 
   def self.ts_query(term: , ts_config:  nil, joiner: nil, weight_filter: nil)
+    to_tsquery(
+      ts_config: ts_config,
+      term: set_tsquery_weight_filter(term, weight_filter),
+      joiner: joiner
+    )
+  end
+
+  def self.to_tsquery(ts_config: nil, term:, joiner: nil)
     ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config
-    term = term.gsub("'", "''")
-    tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '''#{PG::Connection.escape_string(term)}'':*#{weight_filter}')"
+    tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '#{term}')"
     tsquery = "REPLACE(#{tsquery}::text, '&', '#{PG::Connection.escape_string(joiner)}')::tsquery" if joiner
     tsquery
   end
 
+  def self.set_tsquery_weight_filter(term, weight_filter)
+    term = term.gsub("'", "''")
+    "''#{PG::Connection.escape_string(term)}'':*#{weight_filter}"
+  end
+
   def ts_query(ts_config = nil, weight_filter: nil)
     @ts_query_cache ||= {}
     @ts_query_cache["#{ts_config || default_ts_config} #{@term} #{weight_filter}"] ||=
diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb
index b8be9f3..29af224 100644
--- a/spec/models/topic_spec.rb
+++ b/spec/models/topic_spec.rb
@@ -502,37 +502,46 @@ describe Topic do
     end
   end
 
-  context 'similar_to' do
+  context '.similar_to' do
+    fab!(:category) { Fabricate(:category_with_definition) }
 
-    it 'returns blank with nil params' do
-      expect(Topic.similar_to(nil, nil)).to be_blank
+    it 'returns an empty array with nil params' do
+      expect(Topic.similar_to(nil, nil)).to eq([])
     end
 
     context "with a category definition" do
-      let!(:category) { Fabricate(:category_with_definition) }
-
       it "excludes the category definition topic from similar_to" do
-        expect(Topic.similar_to('category definition for', "no body")).to be_blank
+        expect(Topic.similar_to('category definition for', "no body")).to eq([])
       end
     end
 
     context 'with a similar topic' do
-      let!(:topic) {
+      fab!(:post) {
         SearchIndexer.enable
-        post = create_post(title: "Evil trout is the dude who posted this topic")
-        post.topic
+        create_post(title: "Evil trout is the dude who posted this topic")
       }
 
+      let(:topic) { post.topic }
+
+      before do
+        SearchIndexer.enable
+      end
+
       it 'returns the similar topic if the title is similar' do
         expect(Topic.similar_to("has evil trout made any topics?", "i am wondering has evil trout made any topics?")).to eq([topic])
       end
 
-      context "secure categories" do
-        fab!(:category) { Fabricate(:category_with_definition, read_restricted: true) }
+      it 'matches title against title and raw against raw when searching for topics' do
+        topic.update!(title: '1 2 3 numbered titles')
+        post.update!(raw: 'random toy poodle')
 
+        expect(Topic.similar_to("unrelated term", "1 2 3 poddle")).to eq([])
+      end
+
+      context "secure categories" do
         before do
-          topic.category = category
-          topic.save
+          category.update!(read_restricted: true)
+          topic.update!(category: category)
         end
 
         it "doesn't return topics from private categories" do

GitHub sha: 597d542c