FIX: Ignore document length in search when ranking by relevance.

FIX: Ignore document length in search when ranking by relevance.

Considering document length in search introduced too much variance in our search results such that it makes certain searches better but at the same time made certain searches worst. Instead, we want to have a more determistic way of ranking search so that it is easier to reason about why a post is rank higher in search than another.

The long term plan to tackle repeated terms is to restrict the number of positions for a given lexeme in our search index.

diff --git a/config/site_settings.yml b/config/site_settings.yml
index c7f82ac..aeab2e0 100644
--- a/config/site_settings.yml
+++ b/config/site_settings.yml
@@ -1748,7 +1748,7 @@ backups:
 
 search:
   search_ranking_normalization:
-    default: '1'
+    default: '0'
     hidden: true
   min_search_term_length:
     client: true
diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb
index 4f3fd88..90579f8 100644
--- a/spec/components/search_spec.rb
+++ b/spec/components/search_spec.rb
@@ -402,29 +402,6 @@ describe Search do
       expect(result.blurb(reply)).to eq(expected_blurb)
     end
 
-    it 'does not allow a post with repeated words to dominate the ranking' do
-      category = Fabricate(:category_with_definition, name: "winter is coming")
-
-      post = Fabricate(:post,
-        raw: "I think winter will end soon",
-        topic: Fabricate(:topic,
-          title: "dragon john snow winter",
-          category: category
-        )
-      )
-
-      post2 = Fabricate(:post,
-        raw: "I think #{'winter' * 20} will end soon",
-        topic: Fabricate(:topic, title: "dragon john snow summer", category: category)
-      )
-
-      result = Search.execute('winter')
-
-      expect(result.posts.pluck(:id)).to eq([
-        post.id, category.topic.first_post.id, post2.id
-      ])
-    end
-
     it 'applies a small penalty to closed topic when ranking' do
       post = Fabricate(:post,
         raw: "My weekly update",
@@ -698,12 +675,12 @@ describe Search do
         expect(search.posts.map(&:id)).to eq([
           child_of_ignored_category.topic.first_post,
           category.topic.first_post,
-          post,
-          post2
+          post2,
+          post
         ].map(&:id))
 
         search = Search.execute("snow")
-        expect(search.posts).to eq([post, post2])
+        expect(search.posts.map(&:id)).to eq([post2.id, post.id])
 
         category.set_permissions({})
         category.save

GitHub sha: 6385fbbf

1 Like