PERF: remove avg_time calculations and regular jobs from posts and topics

PERF: remove avg_time calculations and regular jobs from posts and topics

After careful analysis of large data-sets it became apparent that avg_time had no impact whatsoever on “best of” topic scoring. Calculating avg_time was a very costly operation especially on large databases.

We have some longer term plans of introducing other weighting that is read time based into our scoring for “best of” and “top” topics, but in the interim to stop a large amount of work that is not achieving any value we are removing the jobs.

Column removal will follow once we decide on a new replacement metric.

diff --git a/app/jobs/scheduled/calculate_avg_time.rb b/app/jobs/scheduled/calculate_avg_time.rb
deleted file mode 100644
index c0bc938..0000000
--- a/app/jobs/scheduled/calculate_avg_time.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-module Jobs
-  class CalculateAvgTime < Jobs::Scheduled
-    every 1.day
-
-    # PERF: these calculations can become exceedingly expnsive
-    #  they run a huge gemoetric mean and are hard to optimise
-    #  defer to only run once a day
-    def execute(args)
-      # Update the average times
-      Post.calculate_avg_time(2.days.ago)
-      Topic.calculate_avg_time(2.days.ago)
-    end
-  end
-end
diff --git a/app/jobs/scheduled/weekly.rb b/app/jobs/scheduled/weekly.rb
index 72fdb01..d76b9f3 100644
--- a/app/jobs/scheduled/weekly.rb
+++ b/app/jobs/scheduled/weekly.rb
@@ -8,8 +8,6 @@ module Jobs
     every 1.week
 
     def execute(args)
-      Post.calculate_avg_time
-      Topic.calculate_avg_time
       ScoreCalculator.new.calculate
       MiniScheduler::Stat.purge_old
       Draft.cleanup!
diff --git a/app/models/post.rb b/app/models/post.rb
index 903400c..f51146c 100644
--- a/app/models/post.rb
+++ b/app/models/post.rb
@@ -674,37 +674,6 @@ class Post < ActiveRecord::Base
 
   end
 
-  # This calculates the geometric mean of the post timings and stores it along with
-  # each post.
-  def self.calculate_avg_time(min_topic_age = nil)
-    retry_lock_error do
-      builder = DB.build("UPDATE posts
-                SET avg_time = (x.gmean / 1000)
-                FROM (SELECT post_timings.topic_id,
-                             post_timings.post_number,
-                             round(exp(avg(CASE WHEN msecs > 0 THEN ln(msecs) ELSE 0 END))) AS gmean
-                      FROM post_timings
-                      INNER JOIN posts AS p2
-                        ON p2.post_number = post_timings.post_number
-                          AND p2.topic_id = post_timings.topic_id
-                          AND p2.user_id <> post_timings.user_id
-                      /*where2*/
-                      GROUP BY post_timings.topic_id, post_timings.post_number) AS x
-                /*where*/")
-
-      builder.where("x.topic_id = posts.topic_id
-                  AND x.post_number = posts.post_number
-                  AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")
-
-      if min_topic_age
-        builder.where2("p2.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)",
-                     bumped_at: min_topic_age)
-      end
-
-      builder.exec
-    end
-  end
-
   before_save do
     self.last_editor_id ||= user_id
 
diff --git a/app/models/topic.rb b/app/models/topic.rb
index bb5a083..b8a0b18 100644
--- a/app/models/topic.rb
+++ b/app/models/topic.rb
@@ -671,31 +671,6 @@ class Topic < ActiveRecord::Base
     SQL
   end
 
-  # This calculates the geometric mean of the posts and stores it with the topic
-  def self.calculate_avg_time(min_topic_age = nil)
-    builder = DB.build <<~SQL
-      UPDATE topics
-      SET avg_time = x.gmean
-      FROM (SELECT topic_id,
-                   round(exp(avg(ln(avg_time)))) AS gmean
-            FROM posts
-            WHERE avg_time > 0 AND avg_time IS NOT NULL
-            GROUP BY topic_id) AS x
-      /*where*/
-    SQL
-
-    builder.where <<~SQL
-      x.topic_id = topics.id AND
-      (topics.avg_time <> x.gmean OR topics.avg_time IS NULL)
-    SQL
-
-    if min_topic_age
-      builder.where("topics.bumped_at > :bumped_at", bumped_at: min_topic_age)
-    end
-
-    builder.exec
-  end
-
   def changed_to_category(new_category)
     return true if new_category.blank? || Category.exists?(topic_id: id)
     return false if new_category.id == SiteSetting.uncategorized_category_id && !SiteSetting.allow_uncategorized_topics
diff --git a/app/serializers/post_serializer.rb b/app/serializers/post_serializer.rb
index 8614d0f..2e059ac 100644
--- a/app/serializers/post_serializer.rb
+++ b/app/serializers/post_serializer.rb
@@ -23,7 +23,6 @@ class PostSerializer < BasicPostSerializer
              :reply_count,
              :reply_to_post_number,
              :quote_count,
-             :avg_time,
              :incoming_link_count,
              :reads,
              :score,
diff --git a/lib/score_calculator.rb b/lib/score_calculator.rb
index 69c8173..cdb70af 100644
--- a/lib/score_calculator.rb
+++ b/lib/score_calculator.rb
@@ -6,7 +6,6 @@ class ScoreCalculator
       like_score: 15,
       incoming_link_count: 5,
       bookmark_count: 2,
-      avg_time: 0.05,
       reads: 0.2
     }
   end
diff --git a/spec/models/post_spec.rb b/spec/models/post_spec.rb
index 8462970..be6cfef 100644
--- a/spec/models/post_spec.rb
+++ b/spec/models/post_spec.rb
@@ -1023,14 +1023,6 @@ describe Post do
     end
   end
 
-  describe "calculate_avg_time" do
-
-    it "should not crash" do
-      Post.calculate_avg_time
-      Post.calculate_avg_time(1.day.ago)
-    end
-  end
-
   describe "has_host_spam" do
     let(:raw) { "hello from my site http://www.example.net http://#{GlobalSetting.hostname} http://#{RailsMultisite::ConnectionManagement.current_hostname}" }
 
diff --git a/spec/models/post_timing_spec.rb b/spec/models/post_timing_spec.rb
index b09a848..dc52290 100644
--- a/spec/models/post_timing_spec.rb
+++ b/spec/models/post_timing_spec.rb
@@ -162,69 +162,6 @@ describe PostTiming do
 
     end
 
-    describe 'avg times' do
-
-      describe 'posts' do
-        it 'has no avg_time by default' do
-          expect(@post.avg_time).to be_blank
-        end
-
-        it "doesn't change when we calculate the avg time for the post because there's no timings" do
-          Post.calculate_avg_time
-          @post.reload
-          expect(@post.avg_time).to be_blank
-        end
-      end
-
-      describe 'topics' do
-        it 'has no avg_time by default' do
-          expect(@topic.avg_time).to be_blank
-        end
-
-        it "doesn't change when we calculate the avg time for the post because there's no timings" do
-          Topic.calculate_avg_time
-          @topic.reload
-          expect(@topic.avg_time).to be_blank
-        end
-      end
-
-      describe "it doesn't create an avg time for the same user" do
-        it 'something' do
-          PostTiming.record_timing(@timing_attrs.merge(user_id: @post.user_id))
-          Post.calculate_avg_time
-          @post.reload
-          expect(@post.avg_time).to be_blank
-        end
-
-      end
-
-      describe 'with a timing for another user' do
-        before do
-          PostTiming.record_timing(@timing_attrs)
-          Post.calculate_avg_time
-          @post.reload
-        end
-
-        it 'has a post avg_time from the timing' do
-          expect(@post.avg_time).to be_present
-        end
-
-        describe 'forum topics' do
-          before do
-            Topic.calculate_avg_time
-            @topic.reload
-          end
-
-          it 'has an avg_time from the timing' do
-            expect(@topic.avg_time).to be_present
-          end
-
-        end
-
-      end
-
-    end
-
   end
 
 end
diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb
index 0180d94..34ca73a 100644
--- a/spec/models/topic_spec.rb
+++ b/spec/models/topic_spec.rb
@@ -2030,13 +2030,6 @@ describe Topic do
 
   end
 
-  describe "calculate_avg_time" do
-    it "does not explode" do
-      Topic.calculate_avg_time
-      Topic.calculate_avg_time(1.day.ago)
-    end
-  end
-
   describe "expandable_first_post?" do
 
     let(:topic) { Fabricate.build(:topic) }
diff --git a/test/javascripts/fixtures/post.js.es6 b/test/javascripts/fixtures/post.js.es6
index 28288e4..736f437 100644
--- a/test/javascripts/fixtures/post.js.es6
+++ b/test/javascripts/fixtures/post.js.es6
@@ -15,7 +15,6 @@ export default {
     reply_count: 1,
     reply_to_post_number: null,
     quote_count: 0,
-    avg_time: 25,
     incoming_link_count: 314,
     reads: 475,
     score: 1702.25,
@@ -68,7 +67,6 @@ export default {
     reply_count: 0,
     reply_to_post_number: null,
     quote_count: 0,

[... diff too long, it was truncated ...]

GitHub sha: f8eddd40

1 Like

This commit has been mentioned on Discourse Meta. There might be relevant details there: