FIX: Improvements and fixes to the image downsizing script (#9950)

FIX: Improvements and fixes to the image downsizing script (#9950)

Fixed bugs, added specs, extracted the upload downsizing code to a class, added support for non-S3 setups, changed it so that images aren’t downloaded twice.

This code has been tested on production and successfully resized ~180k uploads.

Includes:

  • DEV: Extract upload downsizing logic
  • DEV: Add support for non-S3 uploads
  • DEV: Process only images uploaded by users
  • FIX: Incorrect usage of count and exist? typo
  • DEV: Spec S3 image downsizing
  • DEV: Avoid downloading images twice
  • DEV: Update filesizes earlier in the process
  • DEV: Return false on invalid upload
  • FIX: Download images that currently above the limit (If the image size limit is decreased, then there was no way to resize those images that now fall outside the allowed size range)
  • Update script/downsize_uploads.rb (Co-authored-by: Régis Hanol regis@hanol.fr)
diff --git a/lib/file_store/base_store.rb b/lib/file_store/base_store.rb
index 7d22640..504267e 100644
--- a/lib/file_store/base_store.rb
+++ b/lib/file_store/base_store.rb
@@ -76,13 +76,13 @@ module FileStore
       not_implemented
     end
 
-    def download(upload)
+    def download(upload, max_file_size_kb: nil)
       DistributedMutex.synchronize("download_#{upload.sha1}", validity: 3.minutes) do
         filename = "#{upload.sha1}#{File.extname(upload.original_filename)}"
         file = get_from_cache(filename)
 
         if !file
-          max_file_size_kb = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
+          max_file_size_kb ||= [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
 
           url = upload.secure? ?
             Discourse.store.signed_url_for_path(upload.url) :
diff --git a/lib/shrink_uploaded_image.rb b/lib/shrink_uploaded_image.rb
new file mode 100644
index 0000000..e5f57eb
--- /dev/null
+++ b/lib/shrink_uploaded_image.rb
@@ -0,0 +1,236 @@
+# frozen_string_literal: true
+
+class ShrinkUploadedImage
+  attr_reader :upload, :path
+
+  def initialize(upload:, path:, max_pixels:, verbose: false, interactive: false)
+    @upload = upload
+    @path = path
+    @max_pixels = max_pixels
+    @verbose = verbose
+    @interactive = interactive
+  end
+
+  def perform
+    OptimizedImage.downsize(path, path, "#{@max_pixels}@", filename: upload.original_filename)
+    sha1 = Upload.generate_digest(path)
+
+    if sha1 == upload.sha1
+      log "No sha1 change"
+      return false
+    end
+
+    w, h = FastImage.size(path, timeout: 15, raise_on_failure: true)
+
+    if !w || !h
+      log "Invalid image dimensions after resizing"
+      return false
+    end
+
+    # Neither #dup or #clone provide a complete copy
+    original_upload = Upload.find(upload.id)
+    ww, hh = ImageSizer.resize(w, h)
+
+    # A different upload record that matches the sha1 of the downsized image
+    existing_upload = Upload.find_by(sha1: sha1)
+    @upload = existing_upload if existing_upload
+
+    upload.attributes = {
+      sha1: sha1,
+      width: w,
+      height: h,
+      thumbnail_width: ww,
+      thumbnail_height: hh,
+      filesize: File.size(path)
+    }
+
+    if upload.filesize > upload.filesize_was
+      log "No filesize reduction"
+      return false
+    end
+
+    unless existing_upload
+      url = Discourse.store.store_upload(File.new(path), upload)
+
+      unless url
+        log "Couldn't store the upload"
+        return false
+      end
+
+      upload.url = url
+    end
+
+    log "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}"
+    log "sha: #{original_upload.sha1} -> #{sha1}"
+    log "(an exisiting upload)" if existing_upload
+
+    success = true
+    posts = Post.unscoped.joins(:post_uploads).where(post_uploads: { upload_id: original_upload.id }).uniq.sort_by(&:created_at)
+
+    posts.each do |post|
+      transform_post(post, original_upload, upload)
+
+      if post.custom_fields[Post::DOWNLOADED_IMAGES].present?
+        downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES])
+      end
+
+      if post.raw_changed?
+        log "Updating post"
+      elsif downloaded_images&.has_value?(original_upload.id)
+        log "A hotlinked, unreferenced image"
+      elsif post.raw.include?(upload.short_url)
+        log "Already processed"
+      elsif post.trashed?
+        log "A deleted post"
+      elsif !post.topic || post.topic.trashed?
+        log "A deleted topic"
+      elsif post.cooked.include?(original_upload.sha1)
+        if post.raw.include?("#{Discourse.base_url.sub(/^https?:\/\//i, "")}/t/")
+          log "Updating a topic onebox"
+        else
+          log "Updating an external onebox"
+        end
+      else
+        log "Could not find the upload URL"
+        success = false
+      end
+
+      log "#{Discourse.base_url}/p/#{post.id}"
+    end
+
+    if posts.empty?
+      log "Upload not used in any posts"
+
+      if User.where(uploaded_avatar_id: original_upload.id).exists?
+        log "Used as a User avatar"
+      elsif UserAvatar.where(gravatar_upload_id: original_upload.id).exists?
+        log "Used as a UserAvatar gravatar"
+      elsif UserAvatar.where(custom_upload_id: original_upload.id).exists?
+        log "Used as a UserAvatar custom upload"
+      elsif UserProfile.where(profile_background_upload_id: original_upload.id).exists?
+        log "Used as a UserProfile profile background"
+      elsif UserProfile.where(card_background_upload_id: original_upload.id).exists?
+        log "Used as a UserProfile card background"
+      elsif Category.where(uploaded_logo_id: original_upload.id).exists?
+        log "Used as a Category logo"
+      elsif Category.where(uploaded_background_id: original_upload.id).exists?
+        log "Used as a Category background"
+      elsif CustomEmoji.where(upload_id: original_upload.id).exists?
+        log "Used as a CustomEmoji"
+      elsif ThemeField.where(upload_id: original_upload.id).exists?
+        log "Used as a ThemeField"
+      else
+        success = false
+      end
+    end
+
+    unless success
+      if @interactive
+        print "Press any key to continue with the upload"
+        STDIN.beep
+        STDIN.getch
+        puts " k"
+      else
+        if !existing_upload && !Upload.where(url: upload.url).exists?
+          # We're bailing, so clean up the just uploaded file
+          Discourse.store.remove_upload(upload)
+        end
+
+        log "⏩ Skipping"
+        return false
+      end
+    end
+
+    unless upload.save
+      if !existing_upload && !Upload.where(url: upload.url).exists?
+        # We're bailing, so clean up the just uploaded file
+        Discourse.store.remove_upload(upload)
+      end
+
+      log "⏩ Skipping an invalid upload"
+      return false
+    end
+
+    if existing_upload
+      begin
+        PostUpload.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
+      rescue ActiveRecord::RecordNotUnique, PG::UniqueViolation
+      end
+
+      User.where(uploaded_avatar_id: original_upload.id).update_all(uploaded_avatar_id: upload.id)
+      UserAvatar.where(gravatar_upload_id: original_upload.id).update_all(gravatar_upload_id: upload.id)
+      UserAvatar.where(custom_upload_id: original_upload.id).update_all(custom_upload_id: upload.id)
+      UserProfile.where(profile_background_upload_id: original_upload.id).update_all(profile_background_upload_id: upload.id)
+      UserProfile.where(card_background_upload_id: original_upload.id).update_all(card_background_upload_id: upload.id)
+      Category.where(uploaded_logo_id: original_upload.id).update_all(uploaded_logo_id: upload.id)
+      Category.where(uploaded_background_id: original_upload.id).update_all(uploaded_background_id: upload.id)
+      CustomEmoji.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
+      ThemeField.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
+    else
+      upload.optimized_images.each(&:destroy!)
+    end
+
+    posts.each do |post|
+      DistributedMutex.synchronize("process_post_#{post.id}") do
+        current_post = Post.unscoped.find(post.id)
+
+        # If the post became outdated, reapply changes
+        if current_post.updated_at != post.updated_at
+          transform_post(current_post, original_upload, upload)
+          post = current_post
+        end
+
+        if post.raw_changed?
+          post.update_columns(
+            raw: post.raw,
+            updated_at: Time.zone.now
+          )
+        end
+
+        if existing_upload && post.custom_fields[Post::DOWNLOADED_IMAGES].present?
+          downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES])
+
+          downloaded_images.transform_values! do |upload_id|
+            upload_id == original_upload.id ? upload.id : upload_id
+          end
+

[... diff too long, it was truncated ...]

GitHub sha: 3d55f2e3

This commit appears in #9950 which was approved by ZogStriP. It was merged by CvX.