FIX: Re-download hotlinked optimized images (#7249)

FIX: Re-download hotlinked optimized images (#7249)

  • FIX: Download local images, even if download remote is disabled
diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb
index cc9cc05..fd8a724 100644
--- a/app/jobs/regular/pull_hotlinked_images.rb
+++ b/app/jobs/regular/pull_hotlinked_images.rb
@@ -35,8 +35,6 @@ module Jobs
     end
 
     def execute(args)
-      return unless SiteSetting.download_remote_images_to_local?
-
       post_id = args[:post_id]
       raise Discourse::InvalidParameters.new(:post_id) unless post_id.present?
 
@@ -60,7 +58,7 @@ module Jobs
         src = original_src = image['src']
         src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
 
-        if is_valid_image_url(src)
+        if should_download_image?(src)
           begin
             # have we already downloaded that file?
             schemeless_src = remove_scheme(original_src)
@@ -136,16 +134,21 @@ module Jobs
 
     def extract_images_from(html)
       doc = Nokogiri::HTML::fragment(html)
-      doc.css("img[src]") - doc.css("img.avatar")
+      doc.css("img[src]") - doc.css("img.avatar") - doc.css(".lightbox img[src]")
     end
 
-    def is_valid_image_url(src)
+    def should_download_image?(src)
       # make sure we actually have a url
       return false unless src.present?
-      # we don't want to pull uploaded images
-      return false if Discourse.store.has_been_uploaded?(src)
-      # we don't want to pull relative images
-      return false if src =~ /\A\/[^\/]/i
+
+      # If file is on the forum or CDN domain
+      if Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i
+        # Return true if we can't find the upload in the db
+        return !Upload.get_from_url(src)
+      end
+
+      # Don't download non-local images unless site setting enabled
+      return false unless SiteSetting.download_remote_images_to_local?
 
       # parse the src
       begin
@@ -157,11 +160,6 @@ module Jobs
       hostname = uri.hostname
       return false unless hostname
 
-      # we don't want to pull images hosted on the CDN (if we use one)
-      return false if Discourse.asset_host.present? && URI.parse(Discourse.asset_host).hostname == hostname
-      return false if SiteSetting.Upload.s3_cdn_url.present? && URI.parse(SiteSetting.Upload.s3_cdn_url).hostname == hostname
-      # we don't want to pull images hosted on the main domain
-      return false if URI.parse(Discourse.base_url_no_prefix).hostname == hostname
       # check the domains blacklist
       SiteSetting.should_download_images?(src)
     end
diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb
index ee74fba..bd40c41 100644
--- a/spec/jobs/pull_hotlinked_images_spec.rb
+++ b/spec/jobs/pull_hotlinked_images_spec.rb
@@ -124,22 +124,65 @@ describe Jobs::PullHotlinkedImages do
     end
   end
 
-  describe '#is_valid_image_url' do
+  describe '#should_download_image?' do
     subject { described_class.new }
 
     describe 'when url is invalid' do
       it 'should return false' do
-        expect(subject.is_valid_image_url("null")).to eq(false)
-        expect(subject.is_valid_image_url("meta.discourse.org")).to eq(false)
+        expect(subject.should_download_image?("null")).to eq(false)
+        expect(subject.should_download_image?("meta.discourse.org")).to eq(false)
       end
     end
 
     describe 'when url is valid' do
       it 'should return true' do
-        expect(subject.is_valid_image_url("http://meta.discourse.org")).to eq(true)
-        expect(subject.is_valid_image_url("//meta.discourse.org")).to eq(true)
+        expect(subject.should_download_image?("http://meta.discourse.org")).to eq(true)
+        expect(subject.should_download_image?("//meta.discourse.org")).to eq(true)
       end
     end
+
+    describe 'when url is an upload' do
+      it 'should return false for original' do
+        expect(subject.should_download_image?(Fabricate(:upload).url)).to eq(false)
+      end
+
+      it 'should return true for optimized' do
+        src = Discourse.store.get_path_for_optimized_image(Fabricate(:optimized_image))
+        expect(subject.should_download_image?(src)).to eq(true)
+      end
+    end
+
+    context "when download_remote_images_to_local? is false" do
+      before do
+        SiteSetting.download_remote_images_to_local = false
+      end
+
+      it "still returns true for optimized" do
+        src = Discourse.store.get_path_for_optimized_image(Fabricate(:optimized_image))
+        expect(subject.should_download_image?(src)).to eq(true)
+      end
+
+      it 'returns false for valid remote URLs' do
+        expect(subject.should_download_image?("http://meta.discourse.org")).to eq(false)
+      end
+    end
+  end
+
+  describe "with a lightboxed image" do
+    let(:upload) { Fabricate(:upload) }
+
+    before do
+      FastImage.expects(:size).returns([1750, 2000])
+      OptimizedImage.stubs(:resize).returns(true)
+    end
+
+    it "doesn't remove optimized images from lightboxes" do
+      post = Fabricate(:post, raw: "![alt](#{upload.short_url})")
+      Jobs::ProcessPost.new.execute(post_id: post.id)
+
+      expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }.not_to change { Upload.count }
+      expect(post.reload.cooked).to include "/uploads/default/optimized/" # Ensure the lightbox was actually rendered
+    end
   end
 
 end

GitHub sha: 95d58192