FIX: IMDb links were being oneboxed as posters (#13310)

FIX: IMDb links were being oneboxed as posters (#13310)

IMDb movie links were being rendered as posters. This was because IMDb was sending og:type as image randomly in some cases. To fix this we’ll now default all IMDb links as article type. This will ensure that the IMDb onebox link includes all the information instead of showing just a poster without any context.

diff --git a/lib/onebox/engine/allowlisted_generic_onebox.rb b/lib/onebox/engine/allowlisted_generic_onebox.rb
index e264abe..0ccc8eb 100644
--- a/lib/onebox/engine/allowlisted_generic_onebox.rb
+++ b/lib/onebox/engine/allowlisted_generic_onebox.rb
@@ -42,6 +42,10 @@ module Onebox
         %w(slideshare.net dailymotion.com livestream.com imgur.com flickr.com)
       end
 
+      def self.article_html_hosts
+        %w(imdb.com)
+      end
+
       def self.host_matches(uri, list)
         !!list.find { |h| %r((^|\.)#{Regexp.escape(h)}$).match(uri.host) }
       end
@@ -59,7 +63,7 @@ module Onebox
       end
 
       def placeholder_html
-        return article_html if is_article?
+        return article_html if (is_article? || force_article_html?)
         return image_html if is_image?
         return Onebox::Helpers.video_placeholder_html if is_video? || is_card?
         return Onebox::Helpers.generic_placeholder_html if is_embedded?
@@ -150,7 +154,7 @@ module Onebox
       end
 
       def generic_html
-        return article_html  if is_article?
+        return article_html  if (is_article? || force_article_html?)
         return video_html    if is_video?
         return image_html    if is_image?
         return embedded_html if is_embedded?
@@ -207,6 +211,10 @@ module Onebox
         options[:allowed_iframe_regexes]&.any? { |r| src =~ r }
       end
 
+      def force_article_html?
+        AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.article_html_hosts) && (has_text? || is_image_article?)
+      end
+
       def card_html
         escaped_url = ::Onebox::Helpers.normalize_url_for_output(data[:player])
 
diff --git a/spec/fixtures/onebox/imdb.response b/spec/fixtures/onebox/imdb.response
new file mode 100644
index 0000000..ac6bb7a
--- /dev/null
+++ b/spec/fixtures/onebox/imdb.response
@@ -0,0 +1,2186 @@
+<!DOCTYPE html><html lang="en-US" xmlns:og="http://opengraphprotocol.org/schema/" xmlns:fb="http://www.facebook.com/2008/fbml"><head><script>
+var ue_t0=ue_t0||+new Date();
+
+window.ue_ihb = (window.ue_ihb || window.ueinit || 0) + 1;
+if (window.ue_ihb === 1) {
+
+var ue_csm = window,
+    ue_hob = +new Date();
+(function(d){var e=d.ue=d.ue||{},f=Date.now||function(){return+new Date};e.d=function(b){return f()-(b?0:d.ue_t0)};e.stub=function(b,a){if(!b[a]){var c=[];b[a]=function(){c.push([c.slice.call(arguments),e.d(),d.ue_id])};b[a].replay=function(b){for(var a;a=c.shift();)b(a[0],a[1],a[2])};b[a].isStub=1}};e.exec=function(b,a){return function(){try{return b.apply(this,arguments)}catch(c){ueLogError(c,{attribution:a||"undefined",logLevel:"WARN"})}}}})(ue_csm);
+
+
+    var ue_err_chan = 'jserr-rw';
+(function(d,e){function h(f,b){if(!(a.ec>a.mxe)&&f){a.ter.push(f);b=b||{};var c=f.logLevel||b.logLevel;c&&c!==k&&c!==m&&c!==n&&c!==p||a.ec++;c&&c!=k||a.ecf++;b.pageURL=""+(e.location?e.location.href:"");b.logLevel=c;b.attribution=f.attribution||b.attribution;a.erl.push({ex:f,info:b})}}function l(a,b,c,e,g){d.ueLogError({m:a,f:b,l:c,c:""+e,err:g,fromOnError:1,args:arguments},g?{attribution:g.attribution,logLevel:g.logLevel}:void 0);return!1}var k="FATAL",m="ERROR",n="WARN",p="DOWNGRADED",a={ec:0,ecf:0,
+pec:0,ts:0,erl:[],ter:[],mxe:50,startTimer:function(){a.ts++;setInterval(function(){d.ue&&a.pec<a.ec&&d.uex("at");a.pec=a.ec},1E4)}};l.skipTrace=1;h.skipTrace=1;h.isStub=1;d.ueLogError=h;d.ue_err=a;e.onerror=l})(ue_csm,window);
+
+
+var ue_id = 'NQJ5J5X6JSJGPGSWNAKB',
+    ue_url,
+    ue_navtiming = 1,
+    ue_mid = 'A1EVAM02EL8SFB',
+    ue_sid = '141-4880477-5661841',
+    ue_sn = 'www.imdb.com',
+    ue_furl = 'fls-na.amazon.com',
+    ue_surl = 'https://unagi-na.amazon.com/1/events/com.amazon.csm.nexusclient.prod',
+    ue_int = 0,
+    ue_fcsn = 1,
+    ue_urt = 3,
+    ue_rpl_ns = 'cel-rpl',
+    ue_ddq = 1,
+    ue_fpf = '//fls-na.amazon.com/1/batch/1/OP/A1EVAM02EL8SFB:141-4880477-5661841:NQJ5J5X6JSJGPGSWNAKB$uedata=s:',
+    ue_sbuimp = 1,
+    ue_ibft = 0,
+    ue_fnt = 0,
+
+    ue_swi = 1;
+var ue_viz=function(){(function(b,e,a){function k(c){if(b.ue.viz.length<p&&!l){var a=c.type;c=c.originalEvent;/^focus./.test(a)&&c&&(c.toElement||c.fromElement||c.relatedTarget)||(a=e[m]||("blur"==a||"focusout"==a?"hidden":"visible"),b.ue.viz.push(a+":"+(+new Date-b.ue.t0)),"visible"==a&&(b.ue.isl&&q("at"),l=1))}}for(var l=0,q=b.uex,f,g,m,n=["","webkit","o","ms","moz"],d=0,p=20,h=0;h<n.length&&!d;h++)if(a=n[h],f=(a?a+"H":"h")+"idden",d="boolean"==typeof e[f])g=a+"visibilitychange",m=(a?a+"V":"v")+
+"isibilityState";k({});d&&e.addEventListener(g,k,0);b.ue&&d&&(b.ue.pageViz={event:g,propHid:f})})(ue_csm,ue_csm.document,ue_csm.window)};
+
+(function(d,k,K){function G(a){return a&&a.replace&&a.replace(/^\s+|\s+$/g,"")}function q(a){return"undefined"===typeof a}function C(a,b){for(var c in b)b[t](c)&&(a[c]=b[c])}function L(a){try{var b=K.cookie.match(RegExp("(^| )"+a+"=([^;]+)"));if(b)return b[2].trim()}catch(c){}}function M(n,b,c){var e=(x||{}).type;if("device"!==c||2!==e&&1!==e)n&&(d.ue_id=a.id=a.rid=n,y=y.replace(/((.*?:){2})(\w+)/,function(a,b){return b+n})),b&&(y=y.replace(/(.*?:)(\w|-)+/,function(a,c){return c+b}),d.ue_sid=b),c&&
+a.tag("page-source:"+c),d.ue_fpf=y}function O(){var a={};return function(b){b&&(a[b]=1);b=[];for(var c in a)a[t](c)&&b.push(c);return b}}function u(d,b,c,e){if(0<v&&0<=(aa||[]).indexOf(d)&&!b){for(var g=z.now(),k=0;z.now()-g<v;)k++;a.tag("marker-delayed:"+d)}e=e||+new z;var w;if(b||q(c)){if(d)for(w in g=b?h("t",b)||h("t",b,{}):a.t,g[d]=e,c)c[t](w)&&h(w,b,c[w]);return e}}function h(d,b,c){var e=b&&b!=a.id?a.sc[b]:a;e||(e=a.sc[b]={});"id"===d&&c&&(P=1);return e[d]=c||e[d]}function Q(d,b,c,e,g){c="on"+
+c;var h=b[c];"function"===typeof h?d&&(a.h[d]=h):h=function(){};b[c]=function(a){g?(e(a),h(a)):(h(a),e(a))};b[c]&&(b[c].isUeh=1)}function R(n,b,c,e){function r(b,c){var d=[b],e=0,f={},g,k;c?(d.push("m=1"),f[c]=1):f=a.sc;for(k in f)if(f[t](k)){var r=h("wb",k),l=h("t",k)||{},p=h("t0",k)||a.t0,m;if(c||2==r){r=r?e++:"";d.push("sc"+r+"="+k);for(m in l)q(l[m])||null===l[m]||d.push(m+r+"="+(l[m]-p));d.push("t"+r+"="+l[n]);if(h("ctb",k)||h("wb",k))g=1}}!v&&g&&d.push("ctb=1");return d.join("&")}function N(b,
+c,f,e){if(b){var g=d.ue_err;d.ue_url&&!e&&b&&0<b.length&&(e=new Image,a.iel.push(e),e.src=b,a.count&&a.count("postbackImageSize",b.length));if(y){var h=k.encodeURIComponent;h&&b&&(e=new Image,b=""+d.ue_fpf+h(b)+":"+(+new z-d.ue_t0),a.iel.push(e),e.src=b)}else a.log&&(a.log(b,"uedata",{n:1}),a.ielf.push(b));g&&!g.ts&&g.startTimer();a.b&&(g=a.b,a.b="",N(g,c,f,1))}}function w(b){var c=x?x.type:D,d=2==c||a.isBFonMshop,c=c&&!d,e=a.bfini;P||(e&&1<e&&(b+="&bfform=1",c||(a.isBFT=e-1)),d&&(b+="&bfnt=1",a.isBFT=
+a.isBFT||1),a.ssw&&a.isBFT&&(a.isBFonMshop&&(a.isNRBF=0),q(a.isNRBF)&&(d=a.ssw(a.oid),d.e||q(d.val)||(a.isNRBF=1<d.val?0:1)),q(a.isNRBF)||(b+="&nrbf="+a.isNRBF)),a.isBFT&&!a.isNRBF&&(b+="&bft="+a.isBFT));return b}if(!a.paused&&(b||q(c))){for(var p in c)c[t](p)&&h(p,b,c[p]);a.isBFonMshop||u("pc",b,c);p=h("id",b)||a.id;var s=h("id2",b),f=a.url+"?"+n+"&v="+a.v+"&id="+p,v=h("ctb",b)||h("wb",b),A;v&&(f+="&ctb="+v);s&&(f+="&id2="+s);1<d.ueinit&&(f+="&ic="+d.ueinit);if(!("ld"!=n&&"ul"!=n||b&&b!=p)){if("ld"==
+n){try{k[H]&&k[H].isUeh&&(k[H]=null)}catch(F){}if(k.chrome)for(s=0;s<I.length;s++)S(E,I[s]);(s=K.ue_backdetect)&&s.ue_back&&s.ue_back.value++;d._uess&&(A=d._uess());a.isl=1}a._bf&&(f+="&bf="+a._bf());d.ue_navtiming&&g&&(h("ctb",p,"1"),a.isBFonMshop||u("tc",D,D,J));!B||a.isBFonMshop||T||(g&&C(a.t,{na_:g.navigationStart,ul_:g.unloadEventStart,_ul:g.unloadEventEnd,rd_:g.redirectStart,_rd:g.redirectEnd,fe_:g.fetchStart,lk_:g.domainLookupStart,_lk:g.domainLookupEnd,co_:g.connectStart,_co:g.connectEnd,

[... diff too long, it was truncated ...]

GitHub sha: 2e4f0767

This commit appears in #13310 which was approved by CvX. It was merged by techAPJ.

This commit has been mentioned on Discourse Meta. There might be relevant details there: