FEATURE: Body cacher (#460)

FEATURE: Body cacher (#460)

Optionally pass in a simple caching mechanism provider to body_cacher to allow for caching/fetching the bodies of successful HTTP GET requests.

The caching mechanism you provide should support methods called:

fetch_cached_response_body `cached_response_body_exists?

These methods expect a single argument of a URL string.

cache_response_body

Expects a URL string, and the response body string.

diff --git a/lib/onebox/engine/gfycat_onebox.rb b/lib/onebox/engine/gfycat_onebox.rb
index 702ab1b..10973c3 100644
--- a/lib/onebox/engine/gfycat_onebox.rb
+++ b/lib/onebox/engine/gfycat_onebox.rb
@@ -63,7 +63,7 @@ module Onebox
 
       def nokogiri_page
         @nokogiri_page ||= begin
-          response = Onebox::Helpers.fetch_response(url, 10) rescue nil
+          response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
           Nokogiri::HTML(response)
         end
       end
diff --git a/lib/onebox/engine/google_docs_onebox.rb b/lib/onebox/engine/google_docs_onebox.rb
index b3ca4dc..49bfb8d 100644
--- a/lib/onebox/engine/google_docs_onebox.rb
+++ b/lib/onebox/engine/google_docs_onebox.rb
@@ -47,7 +47,7 @@ module Onebox
       end
 
       def get_og_data
-        response = Onebox::Helpers.fetch_response(url, 10) rescue nil
+        response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
         html = Nokogiri::HTML(response)
         og_data = {}
         html.css('meta').each do |m|
diff --git a/lib/onebox/engine/html.rb b/lib/onebox/engine/html.rb
index 9b99465..f11c178 100644
--- a/lib/onebox/engine/html.rb
+++ b/lib/onebox/engine/html.rb
@@ -11,7 +11,8 @@ module Onebox
       end
 
       def raw
-        @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
+        body_cacher = self.options[:body_cacher] if self.options
+        @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
       end
 
       def html?
diff --git a/lib/onebox/engine/pastebin_onebox.rb b/lib/onebox/engine/pastebin_onebox.rb
index 2efb21a..2e119dc 100644
--- a/lib/onebox/engine/pastebin_onebox.rb
+++ b/lib/onebox/engine/pastebin_onebox.rb
@@ -31,7 +31,7 @@ module Onebox
 
       def lines
         return @lines if @lines
-        response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
+        response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
         @lines = response.split("\n")
       end
 
diff --git a/lib/onebox/engine/twitter_status_onebox.rb b/lib/onebox/engine/twitter_status_onebox.rb
index bb0c9ff..31fffad 100644
--- a/lib/onebox/engine/twitter_status_onebox.rb
+++ b/lib/onebox/engine/twitter_status_onebox.rb
@@ -17,7 +17,7 @@ module Onebox
       private
 
       def get_twitter_data
-        response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
+        response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
         html = Nokogiri::HTML(response)
         twitter_data = {}
         html.css('meta').each do |m|
diff --git a/lib/onebox/helpers.rb b/lib/onebox/helpers.rb
index a9e7d9a..10db24c 100644
--- a/lib/onebox/helpers.rb
+++ b/lib/onebox/helpers.rb
@@ -24,8 +24,8 @@ module Onebox
       html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
     end
 
-    def self.fetch_html_doc(url, headers = nil)
-      response = (fetch_response(url, nil, nil, headers) rescue nil)
+    def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
+      response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
       doc = Nokogiri::HTML(response)
       uri = Addressable::URI.parse(url)
 
@@ -37,7 +37,7 @@ module Onebox
         canonical_link = doc.at('//link[@rel="canonical"]/@href')
         canonical_uri = Addressable::URI.parse(canonical_link)
         if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
-          response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
+          response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
           doc = Nokogiri::HTML(response) if response
         end
       end
@@ -45,16 +45,23 @@ module Onebox
       doc
     end
 
-    def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
+    def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
+      redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit
 
-      limit ||= 5
-      limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit
-
-      raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
+      raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0
 
       uri = Addressable::URI.parse(location)
       uri = Addressable::URI.join(domain, uri) if !uri.host
 
+      use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
+      if use_body_cacher
+        response_body = body_cacher.fetch_cached_response_body(uri.to_s)
+
+        if response_body.present?
+          return response_body
+        end
+      end
+
       result = StringIO.new
       Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
         http.open_timeout = Onebox.options.connect_timeout
@@ -86,9 +93,9 @@ module Onebox
             response.error! unless [301, 302].include?(code)
             return fetch_response(
               response['location'],
-              limit - 1,
-              "#{uri.scheme}://#{uri.host}",
-              redir_header
+              redirect_limit: redirect_limit - 1,
+              domain: "#{uri.scheme}://#{uri.host}",
+              headers: redir_header
             )
           end
 
@@ -98,6 +105,10 @@ module Onebox
             raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
           end
 
+          if use_body_cacher && body_cacher.cache_response_body?(uri)
+            body_cacher.cache_response_body(uri.to_s, result.string)
+          end
+
           return result.string
         end
       end

GitHub sha: ca7ea32a

This commit appears in #460 which was approved by ZogStriP. It was merged by jbrw.