FEATURE: Onebox can match engines based on the content_type (#13876)

FEATURE: Onebox can match engines based on the content_type (#13876)

  • FEATURE: Onebox can match engines based on the content_type

FinalDestination now returns the content_type of a resolved URL.

Oneboxer passes this value to Onebox itself. Onebox engines can now specify a matches_content_type regex of content_types that the engine can handle, regardless of the URL.

ImageOnebox will match URLs with a content type of image/png, jpg, gif, bmp, tif, etc.

This will allow images that exist at a URL without a file type extension to be correctly rendered, assuming a valid content_type is returned.

diff --git a/lib/final_destination.rb b/lib/final_destination.rb
index d2a8b03..3831abf 100644
--- a/lib/final_destination.rb
+++ b/lib/final_destination.rb
@@ -30,7 +30,7 @@ class FinalDestination
 
   DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"
 
-  attr_reader :status, :cookie, :status_code, :ignored
+  attr_reader :status, :cookie, :status_code, :content_type, :ignored
 
   def initialize(url, opts = nil)
     @url = url
@@ -215,6 +215,7 @@ class FinalDestination
         end
       end
 
+      @content_type = response.headers['Content-Type'] if response.headers.has_key?('Content-Type')
       @status = :resolved
       return @uri
     when 400, 405, 406, 409, 500, 501
diff --git a/lib/onebox/engine.rb b/lib/onebox/engine.rb
index f1b9196..8699cdd 100644
--- a/lib/onebox/engine.rb
+++ b/lib/onebox/engine.rb
@@ -96,6 +96,12 @@ module Onebox
     end
 
     module ClassMethods
+      def handles_content_type?(other)
+        if other && class_variable_defined?(:@@matcher_content_type)
+          !!(other.to_s =~ class_variable_get(:@@matcher_content_type))
+        end
+      end
+
       def ===(other)
         if other.kind_of?(URI)
           !!(other.to_s =~ class_variable_get(:@@matcher))
@@ -112,6 +118,10 @@ module Onebox
         class_variable_set :@@matcher, r
       end
 
+      def matches_content_type(ct)
+        class_variable_set :@@matcher_content_type, ct
+      end
+
       def requires_iframe_origins(*origins)
         class_variable_set :@@iframe_origins, origins
       end
diff --git a/lib/onebox/engine/image_onebox.rb b/lib/onebox/engine/image_onebox.rb
index 91d64f6..d37faff 100644
--- a/lib/onebox/engine/image_onebox.rb
+++ b/lib/onebox/engine/image_onebox.rb
@@ -5,6 +5,7 @@ module Onebox
     class ImageOnebox
       include Engine
 
+      matches_content_type(/^image\/(png|jpg|jpeg|gif|bmp|tif|tiff)$/)
       matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
 
       def always_https?
diff --git a/lib/onebox/matcher.rb b/lib/onebox/matcher.rb
index 2e33790..1bfbc7a 100644
--- a/lib/onebox/matcher.rb
+++ b/lib/onebox/matcher.rb
@@ -21,7 +21,13 @@ module Onebox
       return if @uri.nil?
       return if @uri.port && !Onebox.options.allowed_ports.include?(@uri.port)
       return if @uri.scheme && !Onebox.options.allowed_schemes.include?(@uri.scheme)
-      ordered_engines.find { |engine| engine === @uri && has_allowed_iframe_origins?(engine) }
+
+      ordered_engines.find do |engine|
+        (
+          engine.respond_to?(:handles_content_type?) && engine.handles_content_type?(@options[:content_type]) ||
+          engine === @uri
+        ) && has_allowed_iframe_origins?(engine)
+      end
     end
 
     def has_allowed_iframe_origins?(engine)
diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb
index 845fc2c..6c194fb 100644
--- a/lib/oneboxer.rb
+++ b/lib/oneboxer.rb
@@ -429,7 +429,8 @@ module Oneboxer
         hostname: GlobalSetting.hostname,
         facebook_app_access_token: SiteSetting.facebook_app_access_token,
         disable_media_download_controls: SiteSetting.disable_onebox_media_download_controls,
-        body_cacher: self
+        body_cacher: self,
+        content_type: fd.content_type
       }
 
       onebox_options[:cookie] = fd.cookie if fd.cookie
diff --git a/spec/components/final_destination_spec.rb b/spec/components/final_destination_spec.rb
index 678423e..8c626eb 100644
--- a/spec/components/final_destination_spec.rb
+++ b/spec/components/final_destination_spec.rb
@@ -42,6 +42,13 @@ describe FinalDestination do
     }
   end
 
+  let(:image_response) do
+    {
+      status: 200,
+      headers: { "Content-Type" => "image/jpeg" }
+    }
+  end
+
   def redirect_response(from, dest)
     stub_request(:head, from).to_return(
       status: 302,
@@ -317,6 +324,19 @@ describe FinalDestination do
       expect(final.resolve.to_s).to eq("#{upstream_url}#L154-L205")
       expect(final.status).to eq(:resolved)
     end
+
+    context "content_type" do
+      before do
+        stub_request(:head, "https://eviltrout.com/this/is/an/image").to_return(image_response)
+      end
+
+      it "returns a content_type" do
+        final = FinalDestination.new("https://eviltrout.com/this/is/an/image", opts)
+        expect(final.resolve.to_s).to eq("https://eviltrout.com/this/is/an/image")
+        expect(final.content_type).to eq("image/jpeg")
+        expect(final.status).to eq(:resolved)
+      end
+    end
   end
 
   describe '.get' do
diff --git a/spec/lib/onebox/engine/image_onebox_spec.rb b/spec/lib/onebox/engine/image_onebox_spec.rb
index 7cf0c75..15f0657 100644
--- a/spec/lib/onebox/engine/image_onebox_spec.rb
+++ b/spec/lib/onebox/engine/image_onebox_spec.rb
@@ -38,4 +38,8 @@ describe Onebox::Engine::ImageOnebox do
   it "includes a direct link to the image" do
     expect(Onebox.preview('http://www.discourse.org/images/logo.png').to_s).to match(/<a.*png/)
   end
+
+  it "matches on content_type" do
+    expect(Onebox.preview('http://www.discourse.org/images/logo', { content_type: 'image/png' }).to_s).to match(/<img/)
+  end
 end
diff --git a/spec/lib/onebox/engine_spec.rb b/spec/lib/onebox/engine_spec.rb
index 5da32ac..f5c3981 100644
--- a/spec/lib/onebox/engine_spec.rb
+++ b/spec/lib/onebox/engine_spec.rb
@@ -50,6 +50,18 @@ describe Onebox::Engine do
     end
   end
 
+  describe "handles_content_type?" do
+    class OneboxEngineImages
+      include Onebox::Engine
+      @@matcher_content_type = /^image\/png$/
+    end
+
+    it "returns true if argument matches the matcher" do
+      result = OneboxEngineImages.handles_content_type?('image/png')
+      expect(result).to eq(true)
+    end
+  end
+
   class AlwaysHttpsEngineExample < OneboxEngineExample
     always_https
   end
diff --git a/spec/lib/onebox/preview_spec.rb b/spec/lib/onebox/preview_spec.rb
index 6d25ffb..bf9cc07 100644
--- a/spec/lib/onebox/preview_spec.rb
+++ b/spec/lib/onebox/preview_spec.rb
@@ -59,9 +59,16 @@ describe Onebox::Preview do
   end
 
   describe "#engine" do
+    let(:preview_image_url) { "http://www.example.com/image/without/file_extension" }
+    let(:preview_image) { described_class.new(preview_image_url, content_type: 'image/png') }
+
     it "returns an engine" do
       expect(preview.send(:engine)).to be_an(Onebox::Engine)
     end
+
+    it "can match based on content_type" do
+      expect(preview_image.send(:engine)).to be_an(Onebox::Engine::ImageOnebox)
+    end
   end
 
   describe "xss" do

GitHub sha: 2f28ba318c3f650b1a0a76530021bc21bc8ab6f6

This commit appears in #13876 which was approved by eviltrout. It was merged by jbrw.