PERF: new 'migrate_to_s3' rake task

PERF: new ‘migrate_to_s3’ rake task

diff --git a/app/models/site_setting.rb b/app/models/site_setting.rb
index 9225216..ee20d17 100644
--- a/app/models/site_setting.rb
+++ b/app/models/site_setting.rb
@@ -153,7 +153,7 @@ class SiteSetting < ActiveRecord::Base
 
       # cf. http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
       if SiteSetting.s3_endpoint == "https://s3.amazonaws.com"
-        if SiteSetting.Upload.s3_region == 'cn-north-1' || SiteSetting.Upload.s3_region == 'cn-northwest-1'
+        if SiteSetting.Upload.s3_region.start_with?("cn-")
           "//#{bucket}.s3.#{SiteSetting.Upload.s3_region}.amazonaws.com.cn"
         else
           "//#{bucket}.s3.dualstack.#{SiteSetting.Upload.s3_region}.amazonaws.com"
diff --git a/lib/db_helper.rb b/lib/db_helper.rb
index 04c9f8d..19cdfe5 100644
--- a/lib/db_helper.rb
+++ b/lib/db_helper.rb
@@ -1,60 +1,91 @@
 class DbHelper
 
-  REMAP_SQL ||= "
+  REMAP_SQL ||= <<~SQL
     SELECT table_name, column_name
       FROM information_schema.columns
      WHERE table_schema = 'public'
        AND is_updatable = 'YES'
        AND (data_type LIKE 'char%' OR data_type LIKE 'text%')
-  ORDER BY table_name, column_name"
+  ORDER BY table_name, column_name
+  SQL
 
-  def self.remap(from, to, anchor_left: false, anchor_right: false, exclude_tables: [])
-    results = DB.query(REMAP_SQL).to_a
+  def self.remap(from, to, anchor_left: false, anchor_right: false, excluded_tables: [])
     like = "#{anchor_left ? '' : "%"}#{from}#{anchor_right ? '' : "%"}"
+    text_columns = Hash.new { |h, k| h[k] = [] }
 
-    remappable_columns = {}
-
-    results.each do |result|
-      remappable_columns[result.table_name] ||= []
-      remappable_columns[result.table_name] << result.column_name
+    DB.query(REMAP_SQL).each do |r|
+      text_columns[r.table_name] << r.column_name
     end
 
-    exclude_tables = exclude_tables.map(&:to_s)
+    text_columns.each do |table, columns|
+      next if excluded_tables.include?(table)
 
-    remappable_columns.each do |table_name, column_names|
-      next if exclude_tables.include?(table_name)
-      set_clause = column_names.map do |column_name|
-        "#{column_name} = REPLACE(#{column_name}, :from, :to)"
+      set = columns.map do |column|
+        "#{column} = REPLACE(#{column}, :from, :to)"
       end.join(", ")
 
-      where_clause = column_names.map do |column_name|
-        "#{column_name} LIKE :like"
+      where = columns.map do |column|
+        "#{column} IS NOT NULL AND #{column} LIKE :like"
       end.join(" OR ")
 
       DB.exec(<<~SQL, from: from, to: to, like: like)
-        UPDATE #{table_name}
-        SET #{set_clause}
-        WHERE #{where_clause}
+        UPDATE #{table}
+           SET #{set}
+         WHERE #{where}
+      SQL
+    end
+
+    SiteSetting.refresh!
+  end
+
+  def self.regexp_replace(pattern, replacement, flags: "gi", match: "~*", excluded_tables: [])
+    text_columns = Hash.new { |h, k| h[k] = [] }
+
+    DB.query(REMAP_SQL).each do |r|
+      text_columns[r.table_name] << r.column_name
+    end
+
+    text_columns.each do |table, columns|
+      next if excluded_tables.include?(table)
+
+      set = columns.map do |column|
+        "#{column} = REGEXP_REPLACE(#{column}, :pattern, :replacement, :flags)"
+      end.join(", ")
+
+      where = columns.map do |column|
+        "#{column} IS NOT NULL AND #{column} #{match} :pattern"
+      end.join(" OR ")
+
+      puts pattern, replacement, flags, match
+
+      DB.exec(<<~SQL, pattern: pattern, replacement: replacement, flags: flags, match: match)
+        UPDATE #{table}
+           SET #{set}
+         WHERE #{where}
       SQL
     end
 
     SiteSetting.refresh!
   end
 
-  def self.find(needle, anchor_left = false, anchor_right = false)
-    connection = ActiveRecord::Base.connection.raw_connection
-    text_columns = connection.async_exec(REMAP_SQL).to_a
-    args = ["#{anchor_left ? '' : "%"}#{needle}#{anchor_right ? '' : "%"}"]
+  def self.find(needle, anchor_left: false, anchor_right: false, excluded_tables: [])
     found = {}
+    like = "#{anchor_left ? '' : "%"}#{needle}#{anchor_right ? '' : "%"}"
+
+    DB.query(REMAP_SQL).each do |r|
+      next if excluded_tables.include?(r.table_name)
 
-    text_columns.each do |rc|
-      table_name = rc["table_name"]
-      column_name = rc["column_name"]
-      result = connection.async_exec("SELECT #{column_name} FROM #{table_name} WHERE #{column_name} LIKE $1", args) rescue nil
-      if result&.ntuples > 0
-        found["#{table_name}.#{column_name}"] = result.map { |r| r[column_name] }
+      rows = DB.query(<<~SQL, like: like)
+        SELECT #{r.column_name}
+          FROM #{r.table_name}
+         WHERE #{r.column_name} LIKE :like
+      SQL
+
+      if rows.size > 0
+        found["#{r.table_name}.#{r.column_name}"] = rows.map { |row| row.send(r.column_name) }
       end
     end
+
     found
   end
 
diff --git a/lib/file_store/base_store.rb b/lib/file_store/base_store.rb
index bdc4856..9043205 100644
--- a/lib/file_store/base_store.rb
+++ b/lib/file_store/base_store.rb
@@ -98,7 +98,7 @@ module FileStore
 
     def get_path_for(type, id, sha, extension)
       depth = get_depth_for(id)
-      tree = File.join(*sha[0, depth].split(""), "")
+      tree = File.join(*sha[0, depth].chars, "")
       "#{type}/#{depth + 1}X/#{tree}#{sha}#{extension}"
     end
 
@@ -107,8 +107,7 @@ module FileStore
         if upload.extension
           ".#{upload.extension}"
         else
-          # Maintain backward compatibility before Jobs::MigrateUploadExtensions
-          # runs
+          # Maintain backward compatibility before Jobs::MigrateUploadExtensions runs
           File.extname(upload.original_filename)
         end
 
diff --git a/lib/s3_helper.rb b/lib/s3_helper.rb
index c3dad06..708c232 100644
--- a/lib/s3_helper.rb
+++ b/lib/s3_helper.rb
@@ -24,8 +24,7 @@ class S3Helper
 
   def upload(file, path, options = {})
     path = get_path_for_s3_upload(path)
-    obj = s3_bucket.object(path)
-    obj.upload_file(file, options)
+    s3_bucket.object(path).upload_file(file, options)
     path
   end
 
@@ -93,7 +92,6 @@ class S3Helper
   end
 
   def update_lifecycle(id, days, prefix: nil, tag: nil)
-
     filter = {}
 
     if prefix
@@ -171,14 +169,15 @@ class S3Helper
   end
 
   def object(path)
-    path = get_path_for_s3_upload(path)
-    s3_bucket.object(path)
+    s3_bucket.object(get_path_for_s3_upload(path))
   end
 
   def self.s3_options(obj)
-    opts = { region: obj.s3_region,
-             endpoint: SiteSetting.s3_endpoint,
-             force_path_style: SiteSetting.s3_force_path_style }
+    opts = {
+      region: obj.s3_region,
+      endpoint: SiteSetting.s3_endpoint,
+      force_path_style: SiteSetting.s3_force_path_style
+    }
 
     unless obj.s3_use_iam_profile
       opts[:access_key_id] = obj.s3_access_key_id
diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake
index cf17515..b722d1c 100644
--- a/lib/tasks/uploads.rake
+++ b/lib/tasks/uploads.rake
@@ -203,9 +203,6 @@ end
 ################################################################################
 
 task "uploads:migrate_to_s3" => :environment do
-  require "file_store/s3_store"
-  require "file_store/local_store"
-
   ENV["RAILS_DB"] ? migrate_to_s3 : migrate_to_s3_all_sites
 end
 
@@ -214,93 +211,180 @@ def migrate_to_s3_all_sites
 end
 
 def migrate_to_s3
-  # make sure s3 is enabled
-  if !SiteSetting.Upload.enable_s3_uploads
-    puts "You must enable s3 uploads before running that task"
-    return
+  db = RailsMultisite::ConnectionManagement.current_db
+
+  dry_run = !!ENV["DRY_RUN"]
+
+  puts "*" * 30 + " DRY RUN " + "*" * 30 if dry_run
+  puts "Migrating uploads to S3 for '#{db}'..."
+
+  if Upload.where("url NOT LIKE '//%' AND url NOT LIKE '/uploads/#{db}/original/_X/%'").exists?
+    puts <<~TEXT
+      Some uploads were not migrated to the new scheme. Please run these commands in the rails console
+
+      SiteSetting.migrate_to_new_scheme = true

[... diff too long, it was truncated ...]

GitHub
sha: 5381096b

This commit has been mentioned on Discourse Meta. There might be relevant details there:

This commit has been mentioned on Discourse Meta. There might be relevant details there: