FEATURE: Add uploads:batch_migrate_from_s3 task to limit total posts migrated at once (#9933)

FEATURE: Add uploads:batch_migrate_from_s3 task to limit total posts migrated at once (#9933)

Allow limiting the number of migrations to do at once, both to do migrations that have impact limited to multiple off-peak usage hours to reduce user impact from a migration, and to allow tests that do only a very small number for test purposes. (“Give me a ping, Vasili. One ping only, please.”)

diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake
index f025346..35cd318 100644
--- a/lib/tasks/uploads.rake
+++ b/lib/tasks/uploads.rake
@@ -93,6 +93,10 @@ task "uploads:migrate_from_s3" => :environment do
   ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
 end
 
+task "uploads:batch_migrate_from_s3", [:limit] => :environment do |_, args|
+  ENV["RAILS_DB"] ? migrate_from_s3(limit: args[:limit]) : migrate_all_from_s3(limit: args[:limit])
+end
+
 def guess_filename(url, raw)
   begin
     uri = URI.parse("http:#{url}")
@@ -110,17 +114,17 @@ def guess_filename(url, raw)
   end
 end
 
-def migrate_all_from_s3
-  RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3 }
+def migrate_all_from_s3(limit: nil)
+  RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3(limit: limit) }
 end
 
-def migrate_from_s3
+def migrate_from_s3(limit: nil)
   require "file_store/s3_store"
 
   # make sure S3 is disabled
   if SiteSetting.Upload.enable_s3_uploads
     puts "You must disable S3 uploads before running that task."
-    return
+    exit 1
   end
 
   db = RailsMultisite::ConnectionManagement.current_db
@@ -129,10 +133,12 @@ def migrate_from_s3
 
   max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
 
-  Post
+  migrate_posts = Post
     .where("user_id > 0")
-    .where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%(upload://%'")
-    .find_each do |post|
+    .where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%#{SiteSetting.Upload.absolute_base_url}%' OR raw LIKE '%(upload://%'")
+  migrate_posts = migrate_posts.limit(limit.to_i) if limit
+
+  migrate_posts.find_each do |post|
     begin
       updated = false
 
diff --git a/spec/tasks/uploads_spec.rb b/spec/tasks/uploads_spec.rb
index cbbd5b1..8cc0838 100644
--- a/spec/tasks/uploads_spec.rb
+++ b/spec/tasks/uploads_spec.rb
@@ -134,6 +134,115 @@ RSpec.describe "tasks/uploads" do
     end
   end
 
+  describe "uploads:batch_migrate_from_s3" do
+    let!(:uploads) do
+      [
+        upload1,
+        upload2,
+      ]
+    end
+
+    let(:upload1) { Fabricate(:upload_s3) }
+    let(:upload2) { Fabricate(:upload_s3) }
+
+    let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" }
+    let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" }
+
+    let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") }
+    let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") }
+
+    before do
+      global_setting :s3_bucket, 'file-uploads/folder'
+      global_setting :s3_region, 'us-east-1'
+      enable_s3_uploads(uploads)
+      upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png"
+      upload1.save!
+      upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png"
+      upload2.save!
+
+      PostUpload.create(post: post1, upload: upload1)
+      PostUpload.create(post: post2, upload: upload2)
+      SiteSetting.enable_s3_uploads = false
+    end
+
+    def invoke_task
+      capture_stdout do
+        Rake::Task['uploads:batch_migrate_from_s3'].invoke('1')
+      end
+    end
+
+    it "applies the limit" do
+      FileHelper.stubs(:download).returns(file_from_fixtures("logo.png")).once()
+
+      freeze_time
+
+      post1.update_columns(baked_at: 1.week.ago)
+      post2.update_columns(baked_at: 1.week.ago)
+      invoke_task
+
+      expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
+      expect(post2.reload.baked_at).to eq_time(1.week.ago)
+    end
+
+  end
+
+  describe "uploads:migrate_from_s3" do
+    let!(:uploads) do
+      [
+        upload1,
+        upload2,
+      ]
+    end
+
+    let(:upload1) { Fabricate(:upload_s3) }
+    let(:upload2) { Fabricate(:upload_s3) }
+
+    let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" }
+    let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" }
+
+    let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") }
+    let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") }
+
+    before do
+      global_setting :s3_bucket, 'file-uploads/folder'
+      global_setting :s3_region, 'us-east-1'
+      enable_s3_uploads(uploads)
+      upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png"
+      upload1.save!
+      upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png"
+      upload2.save!
+
+      PostUpload.create(post: post1, upload: upload1)
+      PostUpload.create(post: post2, upload: upload2)
+      SiteSetting.enable_s3_uploads = false
+    end
+
+    def invoke_task
+      capture_stdout do
+        Rake::Task['uploads:migrate_from_s3'].invoke
+      end
+    end
+
+    it "fails if s3 uploads are still enabled" do
+      SiteSetting.enable_s3_uploads = true
+      expect { invoke_task }.to raise_error(SystemExit)
+    end
+
+    it "does not apply a limit" do
+      FileHelper.stubs(:download).with("http:#{upload1.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once()
+      FileHelper.stubs(:download).with("http:#{upload2.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once()
+
+      freeze_time
+
+      post1.update_columns(baked_at: 1.week.ago)
+      post2.update_columns(baked_at: 1.week.ago)
+      invoke_task
+
+      expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
+      expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
+    end
+  end
+
   describe "uploads:disable_secure_media" do
     def invoke_task
       capture_stdout do

GitHub sha: 81e6bc7a

This commit appears in #9933 which was approved by ZogStriP and martin. It was merged by martin.