FEATURE: Additional control of iframes in oneboxes (#10523)

FEATURE: Additional control of iframes in oneboxes (#10523)

This commit adds a new site setting “allowed_onebox_iframes”. By default, all onebox iframes are allowed. When the list of domains is restricted, Onebox will automatically skip engines which require those domains, and use a fallback engine.

diff --git a/Gemfile.lock b/Gemfile.lock
index af5e172..af551ad 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -242,7 +242,7 @@ GEM
     omniauth-twitter (1.4.0)
       omniauth-oauth (~> 1.1)
       rack
-    onebox (2.0.2)
+    onebox (2.1.0)
       addressable (~> 2.7.0)
       htmlentities (~> 4.3)
       multi_json (~> 1.11)
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index 26522d3..32ded04 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -1553,6 +1553,7 @@ en:
     use_admin_ip_allowlist: "Admins can only log in if they are at an IP address defined in the Screened IPs list (Admin > Logs > Screened Ips)."
     blocked_ip_blocks: "A list of private IP blocks that should never be crawled by Discourse"
     allowed_internal_hosts: "A list of internal hosts that discourse can safely crawl for oneboxing and other purposes"
+    allowed_onebox_iframes: "A list of iframe src domains which are allowed via Onebox embeds. `*` will allow all default Onebox engines."
     allowed_iframes: "A list of iframe src domain prefixes that discourse can safely allow in posts"
     allowed_crawler_user_agents: "User agents of web crawlers that should be allowed to access the site. WARNING! SETTING THIS WILL DISALLOW ALL CRAWLERS NOT LISTED HERE!"
     blocked_crawler_user_agents: "Unique case insensitive word in the user agent string identifying web crawlers that should not be allowed to access the site. Does not apply if allowlist is defined."
diff --git a/config/site_settings.yml b/config/site_settings.yml
index 93cc9c5..6330dc0 100644
--- a/config/site_settings.yml
+++ b/config/site_settings.yml
@@ -1482,6 +1482,11 @@ security:
   allowed_internal_hosts:
     default: ""
     type: list
+  allowed_onebox_iframes:
+    default: "*"
+    type: list
+    allow_any: false
+    choices: "['*'] + Onebox::Engine.all_iframe_origins"
   allowed_iframes:
     default: "https://www.google.com/maps/embed?|https://www.openstreetmap.org/export/embed.html?|https://calendar.google.com/calendar/embed?|https://codepen.io/"
     type: list
diff --git a/lib/onebox/engine/allowlisted_generic_onebox.rb b/lib/onebox/engine/allowlisted_generic_onebox.rb
index 85af927..d13d917 100644
--- a/lib/onebox/engine/allowlisted_generic_onebox.rb
+++ b/lib/onebox/engine/allowlisted_generic_onebox.rb
@@ -16,24 +16,6 @@ module Onebox
         Float::INFINITY
       end
 
-      private
-
-      # overwrite to allowlist iframes
-      def is_embedded?
-        return false unless data[:html] && data[:height]
-        return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
-
-        if data[:html]["iframe"]
-          fragment = Nokogiri::HTML5::fragment(data[:html])
-          if iframe = fragment.at_css("iframe")
-            src = iframe["src"]
-            return src.present? && SiteSetting.allowed_iframes.split("|").any? { |url| src.start_with?(url) }
-          end
-        end
-
-        false
-      end
-
     end
   end
 end
diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb
index a5e1c23..cb7c694 100644
--- a/lib/oneboxer.rb
+++ b/lib/oneboxer.rb
@@ -138,7 +138,9 @@ module Oneboxer
   end
 
   def self.engine(url)
-    Onebox::Matcher.new(url).oneboxed
+    Onebox::Matcher.new(url, {
+      allowed_iframe_regexes: Onebox::Engine.origins_to_regexes(allowed_iframe_origins)
+    }).oneboxed
   end
 
   def self.recently_failed?(url)
@@ -300,6 +302,14 @@ module Oneboxer
     @preserve_fragment_url_hosts ||= ['http://github.com']
   end
 
+  def self.allowed_iframe_origins
+    allowed = SiteSetting.allowed_onebox_iframes.split("|")
+    if allowed.include?("*")
+      allowed = Onebox::Engine.all_iframe_origins
+    end
+    allowed += SiteSetting.allowed_iframes.split("|")
+  end
+
   def self.external_onebox(url)
     Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
       fd = FinalDestination.new(url,
@@ -314,6 +324,7 @@ module Oneboxer
       options = {
         max_width: 695,
         sanitize_config: Onebox::DiscourseOneboxSanitizeConfig::Config::DISCOURSE_ONEBOX,
+        allowed_iframe_origins: allowed_iframe_origins,
         hostname: GlobalSetting.hostname,
       }
 
diff --git a/spec/components/onebox/engine/allowlisted_generic_onebox_spec.rb b/spec/components/onebox/engine/allowlisted_generic_onebox_spec.rb
index 756ac1f..1e9ac4c 100644
--- a/spec/components/onebox/engine/allowlisted_generic_onebox_spec.rb
+++ b/spec/components/onebox/engine/allowlisted_generic_onebox_spec.rb
@@ -18,32 +18,4 @@ describe Onebox::Engine::AllowlistedGenericOnebox do
 
   end
 
-  it "allowlists iframes" do
-    allowlisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://allowlist.ed/iframes.json" />'
-    blocklisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://blocklist.ed/iframes.json" />'
-
-    allowlisted_oembed = {
-      type: "rich",
-      height: "100",
-      html: "<iframe src='https://ifram.es/foo/bar'></iframe>"
-    }
-
-    blocklisted_oembed = {
-      type: "rich",
-      height: "100",
-      html: "<iframe src='https://malicious/discourse.org/'></iframe>"
-    }
-
-    stub_request(:get, "https://blocklist.ed/iframes").to_return(status: 200, body: blocklisted_body)
-    stub_request(:get, "https://blocklist.ed/iframes.json").to_return(status: 200, body: blocklisted_oembed.to_json)
-
-    stub_request(:get, "https://allowlist.ed/iframes").to_return(status: 200, body: allowlisted_body)
-    stub_request(:get, "https://allowlist.ed/iframes.json").to_return(status: 200, body: allowlisted_oembed.to_json)
-
-    SiteSetting.allowed_iframes = "discourse.org|https://ifram.es"
-
-    expect(Onebox.preview("https://blocklist.ed/iframes").to_s).to be_empty
-    expect(Onebox.preview("https://allowlist.ed/iframes").to_s).to match("iframe src")
-  end
-
 end
diff --git a/spec/components/oneboxer_spec.rb b/spec/components/oneboxer_spec.rb
index 17843e2..12f430b 100644
--- a/spec/components/oneboxer_spec.rb
+++ b/spec/components/oneboxer_spec.rb
@@ -183,4 +183,69 @@ describe Oneboxer do
 
     expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to be_present
   end
+
+  context "with youtube stub" do
+    let(:html) do
+      <<~HTML
+        <html>
+        <head>
+          <meta property="og:title" content="Onebox1">
+          <meta property="og:description" content="this is bodycontent">
+        </head>
+        <body>
+           <p>body</p>
+        </body>
+        <html>
+      HTML
+    end
+
+    before do
+      stub_request(:any, "https://www.youtube.com/watch?v=dQw4w9WgXcQ").to_return(status: 200, body: html)
+    end
+
+    it "allows restricting engines based on the allowed_onebox_iframes setting" do
+      output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
+      expect(output).to include("<iframe") # Regular youtube onebox
+
+      # Disable all onebox iframes:
+      SiteSetting.allowed_onebox_iframes = ""
+      output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
+      expect(output).not_to include("<iframe") # Generic onebox
+      expect(output).to include("allowlistedgeneric")
+
+      # Just enable youtube:
+      SiteSetting.allowed_onebox_iframes = "https://www.youtube.com"
+      output = Oneboxer.onebox("https://www.youtube.com/watch?v=dQw4w9WgXcQ", invalidate_oneboxes: true)
+      expect(output).to include("<iframe") # Regular youtube onebox
+    end
+  end
+
+  it "allows iframes from generic sites via the allowed_iframes setting" do
+    allowlisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://allowlist.ed/iframes.json" />'
+    blocklisted_body = '<html><head><link rel="alternate" type="application/json+oembed" href="https://blocklist.ed/iframes.json" />'
+
+    allowlisted_oembed = {
+      type: "rich",
+      height: "100",

[... diff too long, it was truncated ...]

GitHub sha: a3577435

This commit appears in #10523 which was merged by davidtaylorhq.