FEATURE: improve performance of anonymous cache

FEATURE: improve performance of anonymous cache

This commit introduces 2 features:

  1. DISCOURSE_COMPRESS_ANON_CACHE (true|false, default false): this allows you to optionally compress the anon cache body entries in Redis, can be useful for high load sites with Redis that lives on a separate server to to webs

  2. DISCOURSE_ANON_CACHE_STORE_THRESHOLD (default 2), only pop entries into redis if we observe them more than N times. This avoids situations where a crawler can walk a big pile of topics and store them all in Redis never to be used. Our default anon cache time for topics is only 60 seconds. Anon cache is in place to avoid the “slashdot” effect where a single topic is hit by 100s of people in one minute.

diff --git a/Gemfile b/Gemfile
index 57fd0d4..1599efa 100644
--- a/Gemfile
+++ b/Gemfile
@@ -210,6 +210,7 @@ gem 'rubyzip', require: false
 gem 'sshkey', require: false
 
 gem 'rchardet', require: false
+gem 'lz4-ruby', require: false, platform: :mri
 
 if ENV["IMPORT"] == "1"
   gem 'mysql2'
diff --git a/Gemfile.lock b/Gemfile.lock
index aa52c66..64fd900 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -174,6 +174,7 @@ GEM
       crass (~> 1.0.2)
       nokogiri (>= 1.5.9)
     lru_redux (1.1.0)
+    lz4-ruby (0.3.3)
     mail (2.7.1)
       mini_mime (>= 0.1.1)
     maxminddb (0.1.22)
@@ -467,6 +468,7 @@ DEPENDENCIES
   logstash-logger
   logster
   lru_redux
+  lz4-ruby
   mail
   maxminddb
   memory_profiler
diff --git a/config/discourse_defaults.conf b/config/discourse_defaults.conf
index b841af9..2b0bd31 100644
--- a/config/discourse_defaults.conf
+++ b/config/discourse_defaults.conf
@@ -266,3 +266,15 @@ enable_js_error_reporting = true
 # Having a high number here is very low risk. Regular jobs are limited in scope and scale.
 mini_scheduler_workers = 5
 
+# enable compression on anonymous cache redis entries
+# this slightly increases the cost of storing cache entries but can make it much
+# cheaper to retrieve cache entries when redis is stores on a different machine to the one
+# running the web
+compress_anon_cache = false
+
+# Only store entries in redis for anonymous cache if they are observed more than N times
+# for a specific key
+#
+# This ensures there are no pathological cases where we keep storing data in anonymous cache
+# never to use it, set to 1 to store immediately, set to 0 to disable anon cache
+anon_cache_store_threshold = 2
diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb
index c64ae6a..559605c 100644
--- a/lib/middleware/anonymous_cache.rb
+++ b/lib/middleware/anonymous_cache.rb
@@ -73,7 +73,7 @@ module Middleware
       end
 
       def cache_key
-        @cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}"
+        @cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}#{GlobalSetting.compress_anon_cache}"
       end
 
       def theme_ids
@@ -86,6 +86,10 @@ module Middleware
         end
       end
 
+      def cache_key_count
+        @cache_key_count ||= "#{cache_key}_count"
+      end
+
       def cache_key_body
         @cache_key_body ||= "#{cache_key}_body"
       end
@@ -154,8 +158,26 @@ module Middleware
         !!(!has_auth_cookie? && get? && no_cache_bypass)
       end
 
+      def compress(val)
+        if val && GlobalSetting.compress_anon_cache
+          require "lz4-ruby" if !defined?(LZ4)
+          LZ4::compress(val)
+        else
+          val
+        end
+      end
+
+      def decompress(val)
+        if val && GlobalSetting.compress_anon_cache
+          require "lz4-ruby" if !defined?(LZ4)
+          LZ4::uncompress(val)
+        else
+          val
+        end
+      end
+
       def cached(env = {})
-        if body = $redis.get(cache_key_body)
+        if body = decompress($redis.get(cache_key_body))
           if other = $redis.get(cache_key_other)
             other = JSON.parse(other)
             if req_params = other[1].delete(ADP)
@@ -174,9 +196,27 @@ module Middleware
       #  that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks
       #  but coordinating this is tricky
       def cache(result, env = {})
+        return result if GlobalSetting.anon_cache_store_threshold == 0
+
         status, headers, response = result
 
         if status == 200 && cache_duration
+
+          if GlobalSetting.anon_cache_store_threshold > 1
+            count = $redis.eval(<<~REDIS, [cache_key_count], [cache_duration])
+              local current = redis.call("incr", KEYS[1])
+              redis.call("expire",KEYS[1],ARGV[1])
+              return current
+            REDIS
+
+            # technically lua will cast for us, but might as well be
+            # prudent here, hence the to_i
+            if count.to_i < GlobalSetting.anon_cache_store_threshold
+              headers["X-Discourse-Cached"] = "skip"
+              return [status, headers, response]
+            end
+          end
+
           headers_stripped = headers.dup.delete_if { |k, _| ["Set-Cookie", "X-MiniProfiler-Ids"].include? k }
           headers_stripped["X-Discourse-Cached"] = "true"
           parts = []
@@ -191,7 +231,7 @@ module Middleware
             }
           end
 
-          $redis.setex(cache_key_body,  cache_duration, parts.join)
+          $redis.setex(cache_key_body,  cache_duration, compress(parts.join))
           $redis.setex(cache_key_other, cache_duration, [status, headers_stripped].to_json)
 
           headers["X-Discourse-Cached"] = "store"
diff --git a/spec/components/middleware/anonymous_cache_spec.rb b/spec/components/middleware/anonymous_cache_spec.rb
index 9ec7682..74fe7f3 100644
--- a/spec/components/middleware/anonymous_cache_spec.rb
+++ b/spec/components/middleware/anonymous_cache_spec.rb
@@ -129,6 +129,24 @@ describe Middleware::AnonymousCache::Helper do
       crawler.clear_cache
     end
 
+    before do
+      global_setting :anon_cache_store_threshold, 1
+    end
+
+    it "compresses body on demand" do
+      global_setting :compress_anon_cache, true
+
+      payload = "x" * 1000
+      helper.cache([200, { "HELLO" => "WORLD" }, [payload]])
+
+      helper = new_helper("ANON_CACHE_DURATION" => 10)
+      expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]])
+
+      # depends on i7z implementation, but lets assume it is stable unless we discover
+      # otherwise
+      expect($redis.get(helper.cache_key_body).length).to eq(16)
+    end
+
     it "handles brotli switching" do
       helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
 
diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb
index ca0f613..394e260 100644
--- a/spec/components/middleware/request_tracker_spec.rb
+++ b/spec/components/middleware/request_tracker_spec.rb
@@ -285,7 +285,9 @@ describe Middleware::RequestTracker do
       }
 
       tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params))
+      expect(@data[:cache]).to eq("skip")
 
+      tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params))
       expect(@data[:cache]).to eq("store")
 
       tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60))

GitHub sha: ed00f353

1 Like