Remove custom support of WeChat mp articles (#428)

Remove custom support of WeChat mp articles (#428)

Current parser doesn’t support the latest WeChat mp article pages, but it supports The Open Graph protocol now

diff --git a/lib/onebox/engine.rb b/lib/onebox/engine.rb
index 5cea541..9d42cf8 100644
--- a/lib/onebox/engine.rb
+++ b/lib/onebox/engine.rb
@@ -168,7 +168,6 @@ require_relative "engine/twitch_clips_onebox"
 require_relative "engine/twitch_stream_onebox"
 require_relative "engine/twitch_video_onebox"
 require_relative "engine/trello_onebox"
-require_relative "engine/wechat_mp_onebox"
 require_relative "engine/cloudapp_onebox"
 require_relative "engine/wistia_onebox"
 require_relative "engine/simplecast_onebox"
diff --git a/lib/onebox/engine/wechat_mp_onebox.rb b/lib/onebox/engine/wechat_mp_onebox.rb
deleted file mode 100644
index 0ac9ec5..0000000
--- a/lib/onebox/engine/wechat_mp_onebox.rb
+++ /dev/null
@@ -1,62 +0,0 @@
-# frozen_string_literal: true
-
-module Onebox
-  module Engine
-    class WechatMpOnebox
-      include Engine
-      include LayoutSupport
-      include HTML
-
-      always_https
-      matches_regexp(/^https?:\/\/mp\.weixin\.qq\.com\/s.*$/)
-
-      def tld
-        @tld || @@matcher.match(@url)["tld"]
-      end
-
-      def http_params
-        {
-          'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3',
-          'Accept-Encoding' => 'plain'
-        }
-      end
-
-      private
-
-      def extract_script_value(var_name)
-        if (script_elem = raw.css("script").select { |script| script.inner_text.include? "var #{var_name} = " }) && script_elem.any?
-          e = Nokogiri::HTML(script_elem[0].inner_text.match(/var\s+#{Regexp.quote(var_name)}\s+=\s+"(.*?)";/)[1])
-          CGI::unescapeHTML(e.text.scan(/(?:\\x([a-f0-9]{2}))|(.)/i).map { |x| x[0] ? [x[0].to_i(16)].pack('U') : x[1] }.join)
-        end
-      end
-
-      # TODO need to handle hotlink protection from wechat
-      def image
-        if banner_image = extract_script_value("msg_cdn_url")
-          return banner_image
-        end
-
-        if (main_image = raw.css("img").select { |img| not img['class'] }) && main_image.any?
-          attributes = main_image.first.attributes
-
-          return attributes["data-src"].to_s if attributes["data-src"]
-        end
-      end
-
-      def data
-        title = CGI.unescapeHTML(raw.css("title").inner_text)
-        by_info = CGI.unescapeHTML(raw.css("span.rich_media_meta_text.rich_media_meta_nickname").inner_text)
-
-        result = {
-          link: extract_script_value("msg_link") || link,
-          title: title,
-          image: image,
-          description: extract_script_value("msg_desc"),
-          by_info: by_info
-        }
-
-        result
-      end
-    end
-  end
-end
diff --git a/spec/fixtures/wechat-mp.response b/spec/fixtures/wechat-mp.response
deleted file mode 100644
index 7116606..0000000
--- a/spec/fixtures/wechat-mp.response
+++ /dev/null
@@ -1,631 +0,0 @@
-<!DOCTYPE html>
-<!--headTrap<body></body><head></head><html></html>--><html>
-    <head>
-        <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-<meta http-equiv="X-UA-Compatible" content="IE=edge">
-<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=0" />
-<meta name="apple-mobile-web-app-capable" content="yes">
-<meta name="apple-mobile-web-app-status-bar-style" content="black">
-<meta name="format-detection" content="telephone=no">
-
-
-        <script nonce="1875567093" type="text/javascript">
-            window.logs = {
-                pagetime: {}
-            };
-            window.logs.pagetime['html_begin'] = (+new Date());
-        </script>
-        
-<script nonce="1875567093" type="text/javascript">
-    var biz = "MjM5NjM4MDAxMg=="||"";
-    var sn = "7c58f17de2c687f4763f17359ecc6e72" || ""|| "";
-    var mid = "2655075181" || ""|| "";
-    var idx = "1" || "" || "";
-    window.__allowLoadResFromMp = true; 
-    
-</script>
-<script nonce="1875567093" type="text/javascript">
-var page_begintime=+new Date,is_rumor="",norumor="";
-1*is_rumor&&!(1*norumor)&&biz&&mid&&(document.referrer&&-1!=document.referrer.indexOf("mp.weixin.qq.com/mp/rumor")||(location.href="http://mp.weixin.qq.com/mp/rumor?action=info&__biz="+biz+"&mid="+mid+"&idx="+idx+"&sn="+sn+"#wechat_redirect")),
-document.domain="qq.com";
-</script>
-<script nonce="1875567093" type="text/javascript">
-var MutationObserver=window.WebKitMutationObserver||window.MutationObserver||window.MozMutationObserver,isDangerSrc=function(t){
-if(t){
-var e=t.match(/http(?:s)?:\/\/([^\/]+?)(\/|$)/);
-if(e&&!/qq\.com(\:8080)?$/.test(e[1])&&!/weishi\.com$/.test(e[1]))return!0;
-}
-return!1;
-},ishttp=0==location.href.indexOf("http://");
--1==location.href.indexOf("safe=0")&&ishttp&&"function"==typeof MutationObserver&&"mp.weixin.qq.com"==location.host&&(window.__observer_data={
-count:0,
-exec_time:0,
-list:[]
-},window.__observer=new MutationObserver(function(t){
-window.__observer_data.count++;
-var e=new Date,r=[];
-t.forEach(function(t){
-for(var e=t.addedNodes,o=0;o<e.length;o++){
-var n=e[o];
-if("SCRIPT"===n.tagName){
-var i=n.src;
-isDangerSrc(i)&&(window.__observer_data.list.push(i),r.push(n)),!i&&window.__nonce_str&&n.getAttribute("nonce")!=window.__nonce_str&&(window.__observer_data.list.push("inlinescript_without_nonce"),
-r.push(n));
-}
-}
-});
-for(var o=0;o<r.length;o++){
-var n=r[o];
-n.parentNode&&n.parentNode.removeChild(n);
-}
-window.__observer_data.exec_time+=new Date-e;
-}),window.__observer.observe(document,{
-subtree:!0,
-childList:!0
-})),function(){
-if(-1==location.href.indexOf("safe=0")&&Math.random()<.01&&ishttp&&HTMLScriptElement.prototype.__lookupSetter__&&"undefined"!=typeof Object.defineProperty){
-window.__danger_src={
-xmlhttprequest:[],
-script_src:[],
-script_setAttribute:[]
-};
-var t="$"+Math.random();
-HTMLScriptElement.prototype.__old_method_script_src=HTMLScriptElement.prototype.__lookupSetter__("src"),
-HTMLScriptElement.prototype.__defineSetter__("src",function(t){
-t&&isDangerSrc(t)&&window.__danger_src.script_src.push(t),this.__old_method_script_src(t);
-});
-var e="element_setAttribute"+t;
-Object.defineProperty(Element.prototype,e,{
-value:Element.prototype.setAttribute,
-enumerable:!1
-}),Element.prototype.setAttribute=function(t,r){
-"SCRIPT"==this.tagName&&"src"==t&&isDangerSrc(r)&&window.__danger_src.script_setAttribute.push(r),
-this[e](t,r);
-};
-}
-}();
-</script>
-
-        <link rel="dns-prefetch" href="//res.wx.qq.com">
-<link rel="dns-prefetch" href="//mmbiz.qpic.cn">
-<link rel="shortcut icon" type="image/x-icon" href="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/images/icon/common/favicon22c41b.ico">
-<script nonce="1875567093" type="text/javascript">
-    String.prototype.html = function(encode) {
-        var replace =["&#39;", "'", "&quot;", '"', "&nbsp;", " ", "&gt;", ">", "&lt;", "<", "&amp;", "&", "&yen;", "¥"];
-        if (encode) {
-            replace.reverse();
-        }
-        for (var i=0,str=this;i< replace.length;i+= 2) {
-             str=str.replace(new RegExp(replace[i],'g'),replace[i+1]);
-        }
-        return str;
-    };
-
-    window.isInWeixinApp = function() {
-        return /MicroMessenger/.test(navigator.userAgent);
-    };
-
-    window.getQueryFromURL = function(url) {
-        url = url || 'http://qq.com/s?a=b#rd'; 
-        var tmp = url.split('?'),
-            query = (tmp[1] || "").split('#')[0].split('&'),
-            params = {};
-        for (var i=0; i<query.length; i++) {
-            var arg = query[i].split('=');
-            params[arg[0]] = arg[1];
-        }
-        if (params['pass_ticket']) {
-        	params['pass_ticket'] = encodeURIComponent(params['pass_ticket'].html(false).html(false).replace(/\s/g,"+"));
-        }
-        return params;
-    };
-
-    (function() {
-	    var params = getQueryFromURL(location.href);
-        window.uin = params['uin'] || "" || '';
-        window.key = params['key'] || "" || '';
-        window.wxtoken = params['wxtoken'] || '';

[... diff too long, it was truncated ...]

GitHub sha: c9af3f72

This commit appears in #428 which was merged by techAPJ.