PERF: Keep track of when a users first unread is

PERF: Keep track of when a users first unread is

This optimisation avoids large scans joining the topics table with the topic_users table.

Previously when a user carried a lot of read state we would have to join the entire read state with the topics table. This operation would slow down home page and every topic page. The more read state you accumulated the larger the impact.

The optimisation helps people who clean up unread, however if you carry unread from years ago it will only have minimal impact.

diff --git a/app/models/post_timing.rb b/app/models/post_timing.rb
index 82f7a02..0de539a 100644
--- a/app/models/post_timing.rb
+++ b/app/models/post_timing.rb
@@ -80,6 +80,10 @@ class PostTiming < ActiveRecord::Base
         highest_seen_post_number: last_read,
         last_read_post_number: last_read
       )
+
+      if !topic.private_message?
+        set_minimum_first_unread!(user_id: user.id, date: topic.updated_at)
+      end
     end
   end
 
@@ -92,9 +96,24 @@ class PostTiming < ActiveRecord::Base
       TopicUser
         .where('user_id = ? and topic_id in (?)', user_id, topic_ids)
         .delete_all
+
+      date = Topic.listable_topics.where(id: topic_ids).minimum(:updated_at)
+
+      if date
+        set_minimum_first_unread!(user_id: user_id, date: date)
+      end
     end
   end
 
+  def self.set_minimum_first_unread!(user_id:, date:)
+    DB.exec(<<~SQL, date: date, user_id: user_id)
+      UPDATE user_stats
+      SET first_unread_at = :date
+      WHERE first_unread_at > :date AND
+            user_id = :user_id
+    SQL
+  end
+
   MAX_READ_TIME_PER_BATCH = 60 * 1000.0
 
   def self.process_timings(current_user, topic_id, topic_time, timings, opts = {})
diff --git a/app/models/topic_tracking_state.rb b/app/models/topic_tracking_state.rb
index 27965ec..d56dda7 100644
--- a/app/models/topic_tracking_state.rb
+++ b/app/models/topic_tracking_state.rb
@@ -172,7 +172,7 @@ class TopicTrackingState
     #
     sql = report_raw_sql(topic_id: topic_id, skip_unread: true, skip_order: true, staff: user.staff?)
     sql << "\nUNION ALL\n\n"
-    sql << report_raw_sql(topic_id: topic_id, skip_new: true, skip_order: true, staff: user.staff?)
+    sql << report_raw_sql(topic_id: topic_id, skip_new: true, skip_order: true, staff: user.staff?, filter_old_unread: true)
 
     DB.query(
       sql,
@@ -195,6 +195,13 @@ class TopicTrackingState
           .gsub("-999", ":user_id")
       end
 
+    filter_old_unread =
+      if opts && opts[:filter_old_unread]
+        " topics.updated_at >= us.first_unread_at AND "
+      else
+        ""
+      end
+
     new =
       if opts && opts[:skip_new]
         "1=0"
@@ -221,6 +228,7 @@ class TopicTrackingState
     JOIN categories c ON c.id = topics.category_id
     LEFT JOIN topic_users tu ON tu.topic_id = topics.id AND tu.user_id = u.id
     WHERE u.id = :user_id AND
+          #{filter_old_unread}
           topics.archetype <> 'private_message' AND
           ((#{unread}) OR (#{new})) AND
           (topics.visible OR u.admin OR u.moderator) AND
diff --git a/app/models/user_stat.rb b/app/models/user_stat.rb
index c4806af..48940f9 100644
--- a/app/models/user_stat.rb
+++ b/app/models/user_stat.rb
@@ -7,6 +7,77 @@ class UserStat < ActiveRecord::Base
   def self.ensure_consistency!(last_seen = 1.hour.ago)
     reset_bounce_scores
     update_view_counts(last_seen)
+    update_first_unread(last_seen)
+  end
+
+  def self.update_first_unread(last_seen, limit: 10_000)
+    DB.exec(<<~SQL, min_date: last_seen, limit: limit)
+      UPDATE user_stats us
+      SET first_unread_at = Y.min_date
+      FROM (
+        SELECT u1.id user_id,
+           X.min min_date
+        FROM users u1
+        LEFT JOIN
+          (SELECT u.id AS user_id,
+                  min(topics.updated_at) min
+           FROM users u
+           LEFT JOIN topic_users tu ON tu.user_id = u.id
+           LEFT JOIN topics ON tu.topic_id = topics.id
+           JOIN user_stats AS us ON us.user_id = u.id
+           JOIN user_options AS uo ON uo.user_id = u.id
+           JOIN categories c ON c.id = topics.category_id
+           WHERE u.id IN (
+               SELECT id
+               FROM users
+               WHERE last_seen_at IS NOT NULL
+                AND last_seen_at > :min_date
+                ORDER BY last_seen_at DESC
+                LIMIT :limit
+              )
+             AND topics.archetype <> 'private_message'
+             AND (("topics"."deleted_at" IS NULL
+                   AND tu.last_read_post_number < CASE
+                                                      WHEN u.admin
+                                                           OR u.moderator THEN topics.highest_staff_post_number
+                                                      ELSE topics.highest_post_number
+                                                  END
+                   AND COALESCE(tu.notification_level, 1) >= 2)
+                  OR (1=0))
+             AND (topics.visible
+                  OR u.admin
+                  OR u.moderator)
+             AND topics.deleted_at IS NULL
+             AND (NOT c.read_restricted
+                  OR u.admin
+                  OR category_id IN
+                    (SELECT c2.id
+                     FROM categories c2
+                     JOIN category_groups cg ON cg.category_id = c2.id
+                     JOIN group_users gu ON gu.user_id = u.id
+                     AND cg.group_id = gu.group_id
+                     WHERE c2.read_restricted ))
+             AND NOT EXISTS
+               (SELECT 1
+                FROM category_users cu
+                WHERE last_read_post_number IS NULL
+                  AND cu.user_id = u.id
+                  AND cu.category_id = topics.category_id
+                  AND cu.notification_level = 0)
+           GROUP BY u.id,
+                    u.username) AS X ON X.user_id = u1.id
+        WHERE u1.id IN
+            (
+             SELECT id
+             FROM users
+             WHERE last_seen_at IS NOT NULL
+              AND last_seen_at > :min_date
+              ORDER BY last_seen_at DESC
+              LIMIT :limit
+            )
+      ) Y
+      WHERE Y.user_id = us.user_id
+    SQL
   end
 
   def self.reset_bounce_scores
@@ -133,4 +204,5 @@ end
 #  flags_agreed             :integer          default(0), not null
 #  flags_disagreed          :integer          default(0), not null
 #  flags_ignored            :integer          default(0), not null
+#  first_unread_at          :datetime
 #
diff --git a/db/migrate/20190402024053_add_first_unread_at_to_user_stats.rb b/db/migrate/20190402024053_add_first_unread_at_to_user_stats.rb
new file mode 100644
index 0000000..e89b93a
--- /dev/null
+++ b/db/migrate/20190402024053_add_first_unread_at_to_user_stats.rb
@@ -0,0 +1,30 @@
+class AddFirstUnreadAtToUserStats < ActiveRecord::Migration[5.2]
+  disable_ddl_transaction!
+
+  def up
+    # so we can rerun this if the index creation fails out of ddl
+    if !column_exists?(:user_stats, :first_unread_at)
+      add_column :user_stats, :first_unread_at, :datetime, null: false, default: -> { 'CURRENT_TIMESTAMP' }
+    end
+
+    execute <<~SQL
+      UPDATE user_stats us
+      SET first_unread_at = u.created_at
+      FROM users u
+      WHERE u.id = us.user_id
+    SQL
+
+    # this is quite a big index to carry, but we need it to optimise home page initial load
+    # by covering all these columns we are able to quickly retrieve the set of topics that were
+    # updated in the last N days. We perform a ranged lookup and selectivity may vary a lot
+    add_index :topics,
+      [:updated_at, :visible, :highest_staff_post_number, :highest_post_number, :category_id, :created_at, :id],
+      algorithm: :concurrently,
+      name: 'index_topics_on_updated_at_public',
+      where: "(topics.archetype <> 'private_message') AND (topics.deleted_at IS NULL)"
+  end
+
+  def down
+    raise ActiveRecord::IrreversibleMigration
+  end
+end
diff --git a/lib/topic_query.rb b/lib/topic_query.rb
index e1934f8..9fb9496 100644
--- a/lib/topic_query.rb
+++ b/lib/topic_query.rb
@@ -469,6 +469,10 @@ class TopicQuery
         staff: @user&.staff?)
       .order('CASE WHEN topics.user_id = tu.user_id THEN 1 ELSE 2 END')
 
+    if @user
+      result = result.where("topics.updated_at >= (SELECT first_unread_at FROM user_stats WHERE user_id = ?)", @user.id)
+    end
+
     self.class.results_filter_callbacks.each do |filter_callback|
       result = filter_callback.call(:unread, result, @user, options)

[... diff too long, it was truncated ...]

GitHub sha: 5f896ae8

1 Like