Update rake task to backfill etags from s3 inventory

Update rake task to backfill etags from s3 inventory

diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb
index d4bb664..e51ea23 100644
--- a/lib/file_store/s3_store.rb
+++ b/lib/file_store/s3_store.rb
@@ -123,11 +123,11 @@ module FileStore
       SiteSetting.Upload.s3_upload_bucket.downcase
     end
 
-    def list_missing_uploads(skip_optimized: false)
+    def list_missing_uploads(skip_optimized: false, backfill_etags: false)
       if SiteSetting.enable_s3_inventory
         require 's3_inventory'
-        S3Inventory.new(s3_helper, :upload).list_missing
-        S3Inventory.new(s3_helper, :optimized).list_missing unless skip_optimized
+        S3Inventory.new(s3_helper, :upload).list_missing(backfill_etags: backfill_etags)
+        S3Inventory.new(s3_helper, :optimized).list_missing(backfill_etags: backfill_etags) unless skip_optimized
       else
         list_missing(Upload, "original/")
         list_missing(OptimizedImage, "optimized/") unless skip_optimized
diff --git a/lib/s3_inventory.rb b/lib/s3_inventory.rb
index 19afad5..3d9a559 100644
--- a/lib/s3_inventory.rb
+++ b/lib/s3_inventory.rb
@@ -24,7 +24,7 @@ class S3Inventory
     end
   end
 
-  def list_missing
+  def list_missing(backfill_etags: false)
     if files.blank?
       error("Failed to list inventory from S3")
       return
@@ -46,6 +46,13 @@ class S3Inventory
           end
         end
 
+        if backfill_etags
+          uploads = model.where(etag: nil).joins("LEFT JOIN #{table_name} ON #{model.table_name}.url ILIKE '%' || #{table_name}.key")
+          uploads.select(:id, :"#{table_name}.etag").find_each do |upload|
+            model.where(id: upload.id).update_all(etag: upload.etag)
+          end
+        end
+
         uploads = (model == Upload) ? model.where("created_at < ?", last_modified) : model
         missing_uploads = uploads.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag").where("#{table_name}.etag is NULL")
 
diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake
index 40b33e8..ed11824 100644
--- a/lib/tasks/uploads.rake
+++ b/lib/tasks/uploads.rake
@@ -482,16 +482,16 @@ end
 # list all missing uploads and optimized images
 task "uploads:missing" => :environment do
   if ENV["RAILS_DB"]
-    list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'])
+    list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'], backfill_etags: ENV['BACKFILL_ETAGS'])
   else
     RailsMultisite::ConnectionManagement.each_connection do |db|
-      list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'])
+      list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'], backfill_etags: ENV['BACKFILL_ETAGS'])
     end
   end
 end
 
-def list_missing_uploads(skip_optimized: false)
-  Discourse.store.list_missing_uploads(skip_optimized: skip_optimized)
+def list_missing_uploads(skip_optimized: false, backfill_etags: false)
+  Discourse.store.list_missing_uploads(skip_optimized: skip_optimized, backfill_etags: backfill_etags)
 end
 
 ################################################################################
diff --git a/spec/components/s3_inventory_spec.rb b/spec/components/s3_inventory_spec.rb
index 1156d5d..54b6585 100644
--- a/spec/components/s3_inventory_spec.rb
+++ b/spec/components/s3_inventory_spec.rb
@@ -74,4 +74,15 @@ describe "S3Inventory" do
 
     expect(output).to eq("Downloading inventory file 'Key' to tmp directory...\n#{upload.url}\n1 of 4 uploads are missing\n")
   end
+
+  it "should backfill etags to uploads table correctly" do
+    Fabricate(:upload, url: "//bucket.amazonaws.com/original/0184537a4f419224404d013414e913a4f56018f2.jpg", created_at: 2.days.ago)
+
+    inventory.expects(:decompress_inventory_files)
+    inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).at_least(1)
+
+    output = capture_stdout do
+      expect { inventory.list_missing(backfill_etags: true) }.to change { Upload.where(etag: nil).count }.by(-1)
+    end
+  end
 end

GitHub sha: 7b593101

@vinothkannans this commit is broken for local_store:

ArgumentError: unknown keyword: backfill_etags
/var/www/discourse/lib/file_store/local_store.rb:96:in `list_missing_uploads'
2 Likes

Thanks @eviltrout. It is fixed as per the commit

1 Like