FEATURE: Migrate uploads to S3 during restore

FEATURE: Migrate uploads to S3 during restore

diff --git a/lib/backup_restore/restorer.rb b/lib/backup_restore/restorer.rb
index b7f7cdd..a5ea6a0 100644
--- a/lib/backup_restore/restorer.rb
+++ b/lib/backup_restore/restorer.rb
@@ -448,19 +448,38 @@ module BackupRestore
             DbHelper.remap("uploads/#{previous_db_name}", "uploads/#{current_db_name}")
           end
 
+          if SiteSetting.Upload.enable_s3_uploads
+            migrate_to_s3
+            remove_local_uploads(File.join(public_uploads_path, "uploads/#{current_db_name}"))
+          end
+
           generate_optimized_images unless optimized_images_exist
         end
       end
     end
 
+    def migrate_to_s3
+      log "Migrating uploads to S3..."
+      ENV["SKIP_FAILED"] = "1"
+      ENV["MIGRATE_TO_MULTISITE"] = "1" if Rails.configuration.multisite
+      Rake::Task["uploads:migrate_to_s3"].invoke
+    end
+
+    def remove_local_uploads(directory)
+      log "Removing local uploads directory..."
+      FileUtils.rm_rf(directory) if Dir[directory].present?
+    rescue => ex
+      log "Something went wrong while removing the following uploads directory: #{directory}", ex
+    end
+
     def generate_optimized_images
       log 'Optimizing site icons...'
+      DB.exec("TRUNCATE TABLE optimized_images")
       SiteIconManager.ensure_optimized!
 
       log 'Posts will be rebaked by a background job in sidekiq. You will see missing images until that has completed.'
       log 'You can expedite the process by manually running "rake posts:rebake_uncooked_posts"'
 
-      DB.exec("TRUNCATE TABLE optimized_images")
       DB.exec(<<~SQL)
         UPDATE posts
         SET baked_version = NULL
diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake
index dcd8c30..64c1a75 100644
--- a/lib/tasks/uploads.rake
+++ b/lib/tasks/uploads.rake
@@ -345,6 +345,7 @@ def migrate_to_s3
   end
 
   bucket_has_folder_path = true if ENV["DISCOURSE_S3_BUCKET"].include? "/"
+  public_directory = Rails.root.join("public").to_s
 
   opts = {
     region: ENV["DISCOURSE_S3_REGION"],
@@ -369,7 +370,7 @@ def migrate_to_s3
   print " - Listing local files"
 
   local_files = []
-  IO.popen("cd public && find uploads/#{db}/original -type f").each do |file|
+  IO.popen("cd #{public_directory} && find uploads/#{db}/original -type f").each do |file|
     local_files << file.chomp
     putc "." if local_files.size % 1000 == 0
   end
@@ -398,7 +399,7 @@ def migrate_to_s3
 
   skip_etag_verify = ENV["SKIP_ETAG_VERIFY"].present?
   local_files.each do |file|
-    path = File.join("public", file)
+    path = File.join(public_directory, file)
     name = File.basename(path)
     etag = Digest::MD5.file(path).hexdigest unless skip_etag_verify
     key = file[file.index(prefix)..-1]
@@ -534,7 +535,7 @@ def migrate_to_s3
         .where("u.id IS NOT NULL AND u.url LIKE '//%' AND optimized_images.url NOT LIKE '//%'")
         .delete_all
 
-      puts "Flagging all posts containing oneboxes for rebake..."
+      puts "Flagging all posts containing lightboxes for rebake..."
 
       count = Post.where("cooked LIKE '%class=\"lightbox\"%'").update_all(baked_version: nil)
       puts "#{count} posts were flagged for a rebake"

GitHub sha: f7a26486

I would be careful here because if current_db_name is accidentally made nil for whatever reason in the future, we might end up nuking the uploads directory.

1 Like

I’m quite sure the restore process will crash before it reaches that line when current_db_name is nil. And I’d actually like to migrate directly from the tmp directory instead of rsyncing the uploads to the public directory before the migration to S3. There’s no need for the copy step when S3 is used, but I didn’t want to change the rake task more than I had to right now.

It may crash now but we never know when a change might be made that doesn’t cause it to crash and end up nuking the public uploads directory. Just feel like we should be extra careful since the process can’t be tested.

I will try to follow up by making migrate_to_s3 upload directly from the tmp directory. That way we can skip copying files into the public/uploads directory and don’t need the risky delete.

1 Like

REFACTOR: Restoring of backups and migration of uploads to S3 removes the need for deleting the public/uploads directory as it uploads the files to S3 directly from the temp directory.