DEV: Change upload verified column to be integer (#10643)

DEV: Change upload verified column to be integer (#10643)

Per review https://review.discourse.org/t/dev-add-verified-to-uploads-and-fill-in-s3-inventory-10406/14180

Change the verified column for Upload to a verified_status integer column, to avoid having NULL as a weird implicit status.

diff --git a/app/models/upload.rb b/app/models/upload.rb
index 24eb7d9..1656fdb 100644
--- a/app/models/upload.rb
+++ b/app/models/upload.rb
@@ -3,6 +3,10 @@
 require "digest/sha1"
 
 class Upload < ActiveRecord::Base
+  self.ignored_columns = [
+    "verified" # TODO(2020-12-10): remove
+  ]
+
   include ActionView::Helpers::NumberHelper
   include HasUrl
 
@@ -51,6 +55,14 @@ class Upload < ActiveRecord::Base
 
   scope :by_users, -> { where("uploads.id > ?", SEEDED_ID_THRESHOLD) }
 
+  def self.verification_statuses
+    @verification_statuses ||= Enum.new(
+      unchecked: 1,
+      verified: 2,
+      invalid_etag: 3
+    )
+  end
+
   def to_s
     self.url
   end
@@ -449,6 +461,7 @@ end
 #  access_control_post_id :bigint
 #  original_sha1          :string
 #  verified               :boolean
+#  verification_status    :integer          default(1), not null
 #
 # Indexes
 #
diff --git a/db/migrate/20200910051633_change_uploads_verified_to_integer.rb b/db/migrate/20200910051633_change_uploads_verified_to_integer.rb
new file mode 100644
index 0000000..67ef689
--- /dev/null
+++ b/db/migrate/20200910051633_change_uploads_verified_to_integer.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+class ChangeUploadsVerifiedToInteger < ActiveRecord::Migration[6.0]
+  def up
+    add_column :uploads, :verification_status, :integer, null: false, default: 1
+    Migration::ColumnDropper.mark_readonly(:uploads, :verified)
+
+    DB.exec(
+      <<~SQL
+      UPDATE uploads SET verification_status = CASE WHEN
+        verified THEN 2
+        WHEN NOT verified THEN 3
+        END
+      SQL
+    )
+  end
+
+  def down
+    remove_column :uploads, :verification_status
+  end
+end
diff --git a/db/migrate/20200911031738_add_index_to_uploads_verification_status.rb b/db/migrate/20200911031738_add_index_to_uploads_verification_status.rb
new file mode 100644
index 0000000..6d8637d
--- /dev/null
+++ b/db/migrate/20200911031738_add_index_to_uploads_verification_status.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+class AddIndexToUploadsVerificationStatus < ActiveRecord::Migration[6.0]
+  disable_ddl_transaction!
+
+  def up
+    execute <<~SQL
+      CREATE INDEX CONCURRENTLY IF NOT EXISTS
+      idx_uploads_on_verification_status ON uploads USING btree (verification_status)
+    SQL
+  end
+
+  def down
+    execute "DROP INDEX IF EXISTS idx_uploads_on_verification_status"
+  end
+end
diff --git a/lib/s3_inventory.rb b/lib/s3_inventory.rb
index 3b1c564..943ebca 100644
--- a/lib/s3_inventory.rb
+++ b/lib/s3_inventory.rb
@@ -71,22 +71,34 @@ class S3Inventory
               .joins("LEFT JOIN #{table_name} inventory2 ON inventory2.url = #{model.table_name}.url")
               .where("inventory2.etag IS NOT NULL").pluck(:id)
 
+            # marking as verified/not verified
             if model == Upload
-              # marking as verified/not verified
-              DB.exec(<<~SQL, inventory_date
+              sql_params = {
+                inventory_date: inventory_date,
+                unchecked: Upload.verification_statuses[:unchecked],
+                invalid_etag: Upload.verification_statuses[:invalid_etag],
+                verified: Upload.verification_statuses[:verified]
+              }
+              DB.exec(<<~SQL, sql_params)
                 UPDATE #{model.table_name}
-                SET verified = CASE when table_name_alias.etag IS NULL THEN false ELSE true END
+                SET verification_status = CASE WHEN table_name_alias.etag IS NULL
+                  THEN :invalid_etag
+                  ELSE :verified
+                END
                 FROM #{model.table_name} AS model_table
-                LEFT JOIN #{table_name} AS table_name_alias ON model_table.etag = table_name_alias.etag
+                LEFT JOIN #{table_name} AS table_name_alias ON
+                  model_table.etag = table_name_alias.etag
                 WHERE model_table.id = #{model.table_name}.id
-                AND model_table.updated_at < ?
+                AND model_table.updated_at < :inventory_date
                 AND (
-                  model_table.verified IS NULL OR
-                  model_table.verified <> CASE when table_name_alias.etag IS NULL THEN false ELSE true END
+                  model_table.verification_status = :unchecked OR
+                  model_table.verification_status <> CASE WHEN table_name_alias.etag IS NULL
+                    THEN :invalid_etag
+                    ELSE :verified
+                  END
                 )
                 AND model_table.id > #{model::SEEDED_ID_THRESHOLD}
-                SQL
-              )
+              SQL
             end
 
             if (missing_count = missing_uploads.count) > 0
diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake
index 89e3892..2587009 100644
--- a/lib/tasks/uploads.rake
+++ b/lib/tasks/uploads.rake
@@ -1001,7 +1001,7 @@ def analyze_missing_s3
     SELECT post_id, url, sha1, extension, uploads.id
     FROM post_uploads pu
     RIGHT JOIN uploads on uploads.id = pu.upload_id
-    WHERE NOT verified
+    WHERE verification_status = :invalid_etag
     ORDER BY created_at
   SQL
 
@@ -1009,7 +1009,7 @@ def analyze_missing_s3
   other = []
   all = []
 
-  DB.query(sql).each do |r|
+  DB.query(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |r|
     all << r
     if r.post_id
       lookup[r.post_id] ||= []
@@ -1029,7 +1029,7 @@ def analyze_missing_s3
     puts
   end
 
-  missing_uploads = Upload.where(verified: false)
+  missing_uploads = Upload.where(verification_status: Upload.verification_statuses[:invalid_etag])
   puts "Total missing uploads: #{missing_uploads.count}, newest is #{missing_uploads.maximum(:created_at)}"
   puts "Total problem posts: #{lookup.keys.count} with #{lookup.values.sum { |a| a.length } } missing uploads"
   puts "Other missing uploads count: #{other.count}"
@@ -1067,7 +1067,9 @@ def analyze_missing_s3
 end
 
 def delete_missing_s3
-  missing = Upload.where(verified: false).order(:created_at)
+  missing = Upload.where(
+    verification_status: Upload.verification_statuses[:invalid_etag]
+  ).order(:created_at)
   count = missing.count
   if count > 0
     puts "The following uploads will be deleted from the database"
@@ -1110,7 +1112,9 @@ def fix_missing_s3
   Jobs.run_immediately!
 
   puts "Attempting to download missing uploads and recreate"
-  ids = Upload.where(verified: false).pluck(:id)
+  ids = Upload.where(
+    verification_status: Upload.verification_statuses[:invalid_etag]
+  ).pluck(:id)
   ids.each do |id|
     upload = Upload.find(id)
 
@@ -1165,11 +1169,11 @@ def fix_missing_s3
     SELECT post_id
     FROM post_uploads pu
     JOIN uploads on uploads.id = pu.upload_id
-    WHERE NOT verified
+    WHERE verification_status = :invalid_etag
     ORDER BY post_id DESC
   SQL
 
-  DB.query_single(sql).each do |post_id|
+  DB.query_single(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |post_id|
     post = Post.find_by(id: post_id)
     if post
       post.rebake!
diff --git a/spec/components/s3_inventory_spec.rb b/spec/components/s3_inventory_spec.rb
index 2f4122d..9143aff 100644
--- a/spec/components/s3_inventory_spec.rb
+++ b/spec/components/s3_inventory_spec.rb
@@ -107,15 +107,15 @@ describe "S3Inventory" do
     end
 
     it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
-      expect(Upload.where(verified: nil).count).to eq(12)
+      expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(12)
       output = capture_stdout do
         inventory.backfill_etags_and_list_missing
       end
 
-      verified = Upload.pluck(:verified)
-      expect(Upload.where(verified: true).count).to eq(3)
-      expect(Upload.where(verified: false).count).to eq(2)
-      expect(Upload.where(verified: nil).count).to eq(7)

[... diff too long, it was truncated ...]

GitHub sha: 80268357

This commit appears in #10643 which was approved by CvX and tgxworld. It was merged by martin.