FEATURE: Pull hotlinked images in user bios (#14726)

FEATURE: Pull hotlinked images in user bios (#14726)

diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb
index 934941c..b532366 100644
--- a/app/jobs/regular/pull_hotlinked_images.rb
+++ b/app/jobs/regular/pull_hotlinked_images.rb
@@ -241,7 +241,7 @@ module Jobs
       )
     end
 
-    private
+    protected
 
     def normalize_src(src)
       uri = Addressable::URI.heuristic_parse(src)
diff --git a/app/jobs/regular/pull_user_profile_hotlinked_images.rb b/app/jobs/regular/pull_user_profile_hotlinked_images.rb
new file mode 100644
index 0000000..ae87a90
--- /dev/null
+++ b/app/jobs/regular/pull_user_profile_hotlinked_images.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+module Jobs
+  class PullUserProfileHotlinkedImages < ::Jobs::PullHotlinkedImages
+    def execute(args)
+      @user_id = args[:user_id]
+      raise Discourse::InvalidParameters.new(:user_id) if @user_id.blank?
+
+      user_profile = UserProfile.find_by(user_id: @user_id)
+      return if user_profile.blank?
+
+      large_image_urls = []
+      broken_image_urls = []
+      downloaded_images = {}
+
+      extract_images_from(user_profile.bio_cooked).each do |node|
+        download_src = original_src = node['src'] || node['href']
+        download_src = "#{SiteSetting.force_https ? "https" : "http"}:#{original_src}" if original_src.start_with?("//")
+        normalized_src = normalize_src(download_src)
+
+        next if !should_download_image?(download_src)
+
+        begin
+          already_attempted_download = downloaded_images.include?(normalized_src) || large_image_urls.include?(normalized_src) || broken_image_urls.include?(normalized_src)
+          if !already_attempted_download
+            downloaded_images[normalized_src] = attempt_download(download_src, @user_id)
+          end
+        rescue ImageTooLargeError
+          large_image_urls << normalized_src
+        rescue ImageBrokenError
+          broken_image_urls << normalized_src
+        end
+
+        # have we successfully downloaded that file?
+        if upload = downloaded_images[normalized_src]
+          user_profile.bio_raw = replace_in_raw(original_src: original_src, upload: upload, raw: user_profile.bio_raw)
+        end
+      rescue => e
+        raise e if Rails.env.test?
+        log(:error, "Failed to pull hotlinked image (#{download_src}) user: #{@user_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
+      end
+
+      user_profile.skip_pull_hotlinked_image = true
+      user_profile.save!
+    end
+  end
+end
diff --git a/app/jobs/scheduled/clean_up_uploads.rb b/app/jobs/scheduled/clean_up_uploads.rb
index 770ff71..db666f7 100644
--- a/app/jobs/scheduled/clean_up_uploads.rb
+++ b/app/jobs/scheduled/clean_up_uploads.rb
@@ -38,6 +38,7 @@ module Jobs
           encoded_sha = Base62.encode(upload.sha1.hex)
           next if ReviewableQueuedPost.pending.where("payload->>'raw' LIKE '%#{upload.sha1}%' OR payload->>'raw' LIKE '%#{encoded_sha}%'").exists?
           next if Draft.where("data LIKE '%#{upload.sha1}%' OR data LIKE '%#{encoded_sha}%'").exists?
+          next if UserProfile.where("bio_raw LIKE '%#{upload.sha1}%' OR bio_raw LIKE '%#{encoded_sha}%'").exists?
           if defined?(ChatMessage) &&
               ChatMessage.where("message LIKE ? OR message LIKE ?", "%#{upload.sha1}%", "%#{encoded_sha}%").exists?
             next
diff --git a/app/models/user_profile.rb b/app/models/user_profile.rb
index 574f295..4b2f4f6 100644
--- a/app/models/user_profile.rb
+++ b/app/models/user_profile.rb
@@ -12,6 +12,7 @@ class UserProfile < ActiveRecord::Base
   validates :user, presence: true
   before_save :cook
   after_save :trigger_badges
+  after_save :pull_hotlinked_image
 
   validate :website_domain_validator, if: Proc.new { |c| c.new_record? || c.website_changed? }
 
@@ -19,6 +20,8 @@ class UserProfile < ActiveRecord::Base
 
   BAKED_VERSION = 1
 
+  attr_accessor :skip_pull_hotlinked_image
+
   def bio_excerpt(length = 350, opts = {})
     return nil if bio_cooked.blank?
     excerpt = PrettyText.excerpt(bio_cooked, length, opts).sub(/<br>$/, '')
@@ -113,6 +116,16 @@ class UserProfile < ActiveRecord::Base
     BadgeGranter.queue_badge_grant(Badge::Trigger::UserChange, user: self)
   end
 
+  def pull_hotlinked_image
+    if !skip_pull_hotlinked_image && saved_change_to_bio_raw?
+      Jobs.enqueue_in(
+        SiteSetting.editing_grace_period,
+        :pull_user_profile_hotlinked_images,
+        user_id: self.user_id
+      )
+    end
+  end
+
   private
 
   def cooked
diff --git a/spec/jobs/pull_user_profile_hotlinked_images_spec.rb b/spec/jobs/pull_user_profile_hotlinked_images_spec.rb
new file mode 100644
index 0000000..3f5a41f
--- /dev/null
+++ b/spec/jobs/pull_user_profile_hotlinked_images_spec.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+
+describe Jobs::PullUserProfileHotlinkedImages do
+  fab!(:user) { Fabricate(:user) }
+
+  let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" }
+  let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") }
+
+  before do
+    stub_request(:get, image_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
+    SiteSetting.download_remote_images_to_local = true
+  end
+
+  describe '#execute' do
+    before do
+      stub_image_size
+    end
+
+    it 'replaces images' do
+      user.user_profile.update!(bio_raw: "![](#{image_url})")
+      expect { Jobs::PullUserProfileHotlinkedImages.new.execute(user_id: user.id) }.to change { Upload.count }.by(1)
+      expect(user.user_profile.reload.bio_cooked).to include(Upload.last.url)
+    end
+  end
+end

GitHub sha: 1c3c0f04d910483e6d36917e56ef83e01539f10b

This commit appears in #14726 which was approved by eviltrout. It was merged by nbianca.