FEATURE: Replace SimpleRSS with Ruby RSS module (#5311)

FEATURE: Replace SimpleRSS with Ruby RSS module (#5311)

  • SPEC: PollFeedJob parsing atom feed

  • add FeedItemAccessor

It is to provide a consistent interface to access a feed item’s tag content.

  • add FeedElementInstaller

to install non-standard and non-namespaced feed elements

  • FEATURE: replace SimpleRSS with Ruby RSS module

  • get FinalDestination and download with Excon

  • support namespaced element with FeedElementInstaller

diff --git a/Gemfile b/Gemfile
index b17aebb..92793be 100644
--- a/Gemfile
+++ b/Gemfile
@@ -166,7 +166,6 @@ gem 'gc_tracer', require: false, platform: :mri
 
 # required for feed importing and embedding
 gem 'ruby-readability', require: false
-gem 'simple-rss', require: false
 
 gem 'stackprof', require: false, platform: :mri
 gem 'memory_profiler', require: false, platform: :mri
diff --git a/Gemfile.lock b/Gemfile.lock
index 12984fa..ccbe724 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -368,7 +368,6 @@ GEM
       connection_pool (~> 2.2, >= 2.2.0)
       rack-protection (>= 1.5.0)
       redis (>= 3.3.4, < 5)
-    simple-rss (1.3.1)
     slop (3.6.0)
     sprockets (3.7.1)
       concurrent-ruby (~> 1.0)
@@ -500,7 +499,6 @@ DEPENDENCIES
   seed-fu
   shoulda
   sidekiq
-  simple-rss
   sprockets-rails
   stackprof
   thor
diff --git a/app/jobs/scheduled/poll_feed.rb b/app/jobs/scheduled/poll_feed.rb
index 798cbff..2f885c6 100644
--- a/app/jobs/scheduled/poll_feed.rb
+++ b/app/jobs/scheduled/poll_feed.rb
@@ -2,9 +2,13 @@
 # Creates and Updates Topics based on an RSS or ATOM feed.
 #
 require 'digest/sha1'
+require 'excon'
+require 'rss'
+require_dependency 'feed_item_accessor'
+require_dependency 'feed_element_installer'
+require_dependency 'final_destination'
 require_dependency 'post_creator'
 require_dependency 'post_revisor'
-require 'open-uri'
 
 module Jobs
   class PollFeed < Jobs::Scheduled
@@ -46,17 +50,11 @@ module Jobs
 
     def import_topic(topic)
       if topic.user
-        TopicEmbed.import(topic.user, topic.url, topic.title, CGI.unescapeHTML(topic.content.scrub))
+        TopicEmbed.import(topic.user, topic.url, topic.title, CGI.unescapeHTML(topic.content))
       end
     end
 
     class Feed
-      require 'simple-rss'
-
-      if SiteSetting.embed_username_key_from_feed.present?
-        SimpleRSS.item_tags << SiteSetting.embed_username_key_from_feed.to_sym
-      end
-
       def initialize
         @feed_url = SiteSetting.feed_polling_url
         @feed_url = "http://#{@feed_url}" if @feed_url !~ /^https?\:\/\//
@@ -65,7 +63,7 @@ module Jobs
       def topics
         feed_topics = []
 
-        rss = fetch_rss
+        rss = parsed_feed
         return feed_topics unless rss.present?
 
         rss.items.each do |i|
@@ -78,36 +76,56 @@ module Jobs
 
       private
 
-      def fetch_rss
-        SimpleRSS.parse open(@feed_url, allow_redirections: :all)
-      rescue OpenURI::HTTPError, SimpleRSSError
+      def parsed_feed
+        raw_feed = fetch_rss
+        return nil if raw_feed.blank?
+
+        if SiteSetting.embed_username_key_from_feed.present?
+          FeedElementInstaller.install(SiteSetting.embed_username_key_from_feed, raw_feed)
+        end
+
+        RSS::Parser.parse(raw_feed)
+      rescue RSS::NotWellFormedError, RSS::InvalidRSSError
         nil
       end
 
+      def fetch_rss
+        final_destination = FinalDestination.new(@feed_url, verbose: true)
+        feed_final_url = final_destination.resolve
+        return nil unless final_destination.status == :resolved
+
+        Excon.new(feed_final_url.to_s).request(method: :get, expects: 200).body
+      rescue Excon::Error::HTTPStatus
+        nil
+      end
     end
 
     class FeedTopic
       def initialize(article_rss_item)
-        @article_rss_item = article_rss_item
+        @accessor = FeedItemAccessor.new(article_rss_item)
       end
 
       def url
-        link = @article_rss_item.link
+        link = @accessor.link
         if url?(link)
           return link
         else
-          return @article_rss_item.id
+          return @accessor.element_content(:id)
         end
       end
 
       def content
-        @article_rss_item.content_encoded&.force_encoding("UTF-8")&.scrub ||
-          @article_rss_item.content&.force_encoding("UTF-8")&.scrub ||
-          @article_rss_item.description&.force_encoding("UTF-8")&.scrub
+        content = nil
+
+        %i[content_encoded content description].each do |content_element_name|
+          content ||= @accessor.element_content(content_element_name)
+        end
+
+        content&.force_encoding('UTF-8')&.scrub
       end
 
       def title
-        @article_rss_item.title.force_encoding("UTF-8").scrub
+        @accessor.element_content(:title).force_encoding('UTF-8').scrub
       end
 
       def user
@@ -125,11 +143,7 @@ module Jobs
       end
 
       def author_username
-        begin
-          @article_rss_item.send(SiteSetting.embed_username_key_from_feed.to_sym)
-        rescue
-          nil
-        end
+        @accessor.element_content(SiteSetting.embed_username_key_from_feed.sub(':', '_'))
       end
 
       def default_user
@@ -145,9 +159,6 @@ module Jobs
       def find_user(user_name)
         User.where(username_lower: user_name).first
       end
-
     end
-
   end
-
 end
diff --git a/lib/feed_element_installer.rb b/lib/feed_element_installer.rb
new file mode 100644
index 0000000..c9354e4
--- /dev/null
+++ b/lib/feed_element_installer.rb
@@ -0,0 +1,52 @@
+require 'rexml/document'
+require 'rss'
+
+class FeedElementInstaller
+  private_class_method :new
+
+  def self.install(element_name, feed)
+    # RSS Specification at http://cyber.harvard.edu/rss/rss.html#extendingRss
+    # > A RSS feed may contain [non-standard elements], only if those elements are *defined in a namespace*
+
+    new(element_name, feed).install if element_name.include?(':')
+  end
+
+  attr_reader :feed, :original_name, :element_namespace, :element_name, :element_accessor
+
+  def initialize(element_name, feed)
+    @feed = feed
+    @original_name = element_name
+    @element_namespace, @element_name = *element_name.split(':')
+    @element_accessor = "#{@element_namespace}_#{@element_name}"
+  end
+
+  def element_uri
+    @element_uri ||= REXML::Document.new(feed).root&.attributes&.namespaces&.fetch(@element_namespace, '') || ''
+  end
+
+  def install
+    install_in_rss unless installed_in_rss?
+    install_in_atom unless installed_in_atom?
+  end
+
+  private
+
+  def install_in_rss
+    RSS::Rss::Channel::Item.install_text_element(element_name, element_uri, '?', element_accessor, nil, original_name)
+    RSS::BaseListener.install_get_text_element(element_uri, element_name, element_accessor)
+  end
+
+  def install_in_atom
+    RSS::Atom::Entry.install_text_element(element_name, element_uri, '?', element_accessor, nil, original_name)
+    RSS::Atom::Feed::Entry.install_text_element(element_name, element_uri, '?', element_accessor, nil, original_name)
+    RSS::BaseListener.install_get_text_element(element_uri, element_name, element_accessor)
+  end
+
+  def installed_in_rss?
+    RSS::Rss::Channel::Item.method_defined?(element_accessor)
+  end
+
+  def installed_in_atom?
+    RSS::Atom::Entry.method_defined?(element_accessor) || RSS::Atom::Feed::Entry.method_defined?(element_accessor)
+  end
+end
diff --git a/lib/feed_item_accessor.rb b/lib/feed_item_accessor.rb
new file mode 100644
index 0000000..36acabc
--- /dev/null
+++ b/lib/feed_item_accessor.rb
@@ -0,0 +1,25 @@
+class FeedItemAccessor
+  attr_accessor :rss_item
+
+  def initialize(rss_item)
+    @rss_item = rss_item
+  end
+
+  def element_content(element_name)
+    try_attribute_or_self(element(element_name), :content)
+  end
+
+  def link
+    try_attribute_or_self(element(:link), :href)
+  end
+
+  private
+
+  def element(element_name)
+    rss_item.respond_to?(element_name) ? rss_item.send(element_name) : nil
+  end
+
+  def try_attribute_or_self(element, attribute_name)
+    element.respond_to?(attribute_name) ? element.send(attribute_name) : element
+  end
+end
diff --git a/spec/components/feed_element_installer_spec.rb b/spec/components/feed_element_installer_spec.rb
new file mode 100644
index 0000000..48d6eb0
--- /dev/null
+++ b/spec/components/feed_element_installer_spec.rb
@@ -0,0 +1,40 @@
+require 'feed_element_installer'
+require 'rails_helper'
+
+describe FeedElementInstaller do
+  describe '#install_rss_element' do

[... diff too long, it was truncated ...]

GitHub sha: 5f318a52

This commit has been mentioned on Discourse Meta. There might be relevant details there:

https://meta.discourse.org/t/autobot-automatic-content-creator/71756/70

This commit has been mentioned on Discourse Meta. There might be relevant details there:

https://meta.discourse.org/t/configure-the-discourse-rss-polling-plugin/156387/6