REFACTOR: Restoring of backups and migration of uploads to S3

REFACTOR: Restoring of backups and migration of uploads to S3

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d3ad739428..ce246dc12e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,5 +1,5 @@
 name: CI
-   
+
 on:
   push:
     branches:
@@ -7,7 +7,7 @@ on:
   pull_request:
     branches-ignore:
       - 'tests-passed'
-    
+
 jobs:
   build:
     name: "${{ matrix.target }}-${{ matrix.build_types }}"
@@ -38,7 +38,7 @@ jobs:
     services:
       postgres:
         image: postgres:${{ matrix.postgres }}
-        ports: 
+        ports:
           - 5432:5432
         env:
           POSTGRES_USER: discourse
@@ -88,14 +88,14 @@ jobs:
           key: ${{ runner.os }}-gem-${{ hashFiles('**/Gemfile.lock') }}
           restore-keys: |
             ${{ runner.os }}-gem-
-      
+
       - name: Setup gems
         run: bundle install --without development --deployment --jobs 4 --retry 3
 
       - name: Get yarn cache directory
         id: yarn-cache-dir
         run: echo "::set-output name=dir::$(yarn cache dir)"
-    
+
       - name: Yarn cache
         uses: actions/cache@v1
         id: yarn-cache
@@ -113,7 +113,7 @@ jobs:
         run: bin/rake plugin:install_all_official
 
       - name: Create database
-        if: env.BUILD_TYPE != 'LINT'    
+        if: env.BUILD_TYPE != 'LINT'
         run: bin/rake db:create && bin/rake db:migrate
 
       - name: Create parallel databases
@@ -123,7 +123,7 @@ jobs:
       - name: Rubocop
         if: env.BUILD_TYPE == 'LINT'
         run: bundle exec rubocop .
-      
+
       - name: ESLint
         if: env.BUILD_TYPE == 'LINT'
         run: yarn eslint app/assets/javascripts test/javascripts && yarn eslint --ext .es6 app/assets/javascripts test/javascripts plugins
@@ -133,7 +133,7 @@ jobs:
         run: |
           yarn prettier -v
           yarn prettier --list-different "app/assets/stylesheets/**/*.scss" "app/assets/javascripts/**/*.es6" "test/javascripts/**/*.es6" "plugins/**/*.scss" "plugins/**/*.es6"
-        
+
       - name: Core RSpec
         if: env.BUILD_TYPE == 'BACKEND' && env.TARGET == 'CORE'
         run: bin/turbo_rspec && bin/rake plugin:spec
@@ -146,12 +146,12 @@ jobs:
         if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE'
         run: bundle exec rake qunit:test['1200000']
         timeout-minutes: 30
-      
+
       - name: Wizard QUnit
         if: env.BUILD_TYPE == 'FRONTEND' && env.TARGET == 'CORE'
         run: bundle exec rake qunit:test['1200000','/wizard/qunit']
         timeout-minutes: 30
-          
+
       - name: Plugin QUnit # Tests core plugins in TARGET=CORE, and all plugins in TARGET=PLUGINS
         if: env.BUILD_TYPE == 'FRONTEND'
         run: bundle exec rake plugin:qunit
diff --git a/.gitignore b/.gitignore
index 5bc058976e..59ad3c0b6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ config/discourse.conf
 # Ignore the default SQLite database and db dumps
 *.sql
 *.sql.gz
+!/spec/fixtures/**/*.sql
 /db/*.sqlite3
 /db/structure.sql
 /db/schema.rb
diff --git a/lib/backup_restore.rb b/lib/backup_restore.rb
index 3b7e189d07..6259044dcb 100644
--- a/lib/backup_restore.rb
+++ b/lib/backup_restore.rb
@@ -4,10 +4,8 @@ module BackupRestore
 
   class OperationRunningError < RuntimeError; end
 
-  VERSION_PREFIX = "v".freeze
-  DUMP_FILE = "dump.sql.gz".freeze
-  OLD_DUMP_FILE = "dump.sql".freeze
-  METADATA_FILE = "meta.json"
+  VERSION_PREFIX = "v"
+  DUMP_FILE = "dump.sql.gz"
   LOGS_CHANNEL = "/admin/backups/logs"
 
   def self.backup!(user_id, opts = {})
@@ -19,7 +17,16 @@ module BackupRestore
   end
 
   def self.restore!(user_id, opts = {})
-    start! BackupRestore::Restorer.new(user_id, opts)
+    restorer = BackupRestore::Restorer.new(
+      user_id: user_id,
+      filename: opts[:filename],
+      factory: BackupRestore::Factory.new(
+        user_id: user_id,
+        client_id: opts[:client_id]
+      )
+    )
+
+    start! restorer
   end
 
   def self.rollback!
@@ -75,16 +82,18 @@ module BackupRestore
   end
 
   def self.move_tables_between_schemas(source, destination)
-    DB.exec(move_tables_between_schemas_sql(source, destination))
+    ActiveRecord::Base.transaction do
+      DB.exec(move_tables_between_schemas_sql(source, destination))
+    end
   end
 
   def self.move_tables_between_schemas_sql(source, destination)
-    <<-SQL
+    <<~SQL
       DO $$DECLARE row record;
       BEGIN
         -- create <destination> schema if it does not exists already
         -- NOTE: DROP & CREATE SCHEMA is easier, but we don't want to drop the public schema
-        -- ortherwise extensions (like hstore & pg_trgm) won't work anymore...
+        -- otherwise extensions (like hstore & pg_trgm) won't work anymore...
         CREATE SCHEMA IF NOT EXISTS #{destination};
         -- move all <source> tables to <destination> schema
         FOR row IN SELECT tablename FROM pg_tables WHERE schemaname = '#{source}'
@@ -108,11 +117,17 @@ module BackupRestore
     config = ActiveRecord::Base.connection_pool.spec.config
     config = config.with_indifferent_access
 
+    # credentials for PostgreSQL in CI environment
+    if Rails.env.test?
+      username = ENV["PGUSER"]
+      password = ENV["PGPASSWORD"]
+    end
+
     DatabaseConfiguration.new(
       config["backup_host"] || config["host"],
       config["backup_port"] || config["port"],
-      config["username"] || ENV["USER"] || "postgres",
-      config["password"],
+      config["username"] || username || ENV["USER"] || "postgres",
+      config["password"] || password,
       config["database"]
     )
   end
diff --git a/lib/backup_restore/backup_file_handler.rb b/lib/backup_restore/backup_file_handler.rb
new file mode 100644
index 0000000000..c5e3702488
--- /dev/null
+++ b/lib/backup_restore/backup_file_handler.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+module BackupRestore
+  class BackupFileHandler
+    OLD_DUMP_FILENAME = "dump.sql"
+
+    delegate :log, to: :@logger, private: true
+
+    def initialize(logger, filename, current_db, root_tmp_directory = Rails.root)
+      @logger = logger
+      @filename = filename
+      @current_db = current_db
+      @root_tmp_directory = root_tmp_directory
+      @is_archive = !(@filename =~ /\.sql\.gz$/)
+    end
+
+    def decompress
+      create_tmp_directory
+      @archive_path = File.join(@tmp_directory, @filename)
+
+      copy_archive_to_tmp_directory
+      decompress_archive
+      extract_db_dump
+
+      [@tmp_directory, @db_dump_path]
+    end
+
+    def clean_up
+      return if @tmp_directory.blank?
+
+      log "Removing tmp '#{@tmp_directory}' directory..."
+      FileUtils.rm_rf(@tmp_directory) if Dir[@tmp_directory].present?
+    rescue => ex
+      log "Something went wrong while removing the following tmp directory: #{@tmp_directory}", ex
+    end
+
+    protected
+
+    def create_tmp_directory
+      timestamp = Time.zone.now.strftime("%Y-%m-%d-%H%M%S")
+      @tmp_directory = File.join(@root_tmp_directory, "tmp", "restores", @current_db, timestamp)
+      ensure_directory_exists(@tmp_directory)
+    end
+
+    def ensure_directory_exists(directory)
+      log "Making sure #{directory} exists..."
+      FileUtils.mkdir_p(directory)
+    end
+
+    def copy_archive_to_tmp_directory
+      store = BackupRestore::BackupStore.create
+
+      if store.remote?
+        log "Downloading archive to tmp directory..."
+        failure_message = "Failed to download archive to tmp directory."
+      else
+        log "Copying archive to tmp directory..."
+        failure_message = "Failed to copy archive to tmp directory."
+      end
+
+      store.download_file(@filename, @archive_path, failure_message)
+    end
+
+    def decompress_archive
+      return if !@is_archive
+
+      log "Unzipping archive, this may take a while..."
+      pipeline = Compression::Pipeline.new([Compression::Tar.new, Compression::Gzip.new])
+      unzipped_path = pipeline.decompress(@tmp_directory, @archive_path, available_size)
+      pipeline.strip_directory(unzipped_path, @tmp_directory)
+    end
+

[... diff too long, it was truncated ...]

GitHub sha: e474cda3

1 Like

Have you looked at this? It’s nice to let the stdlib do this kind of thing instead of us.

1 Like

This is a huge improvement. Thanks :clap:

2 Likes

Yes, I’m aware of this function, but I didn’t want to change all the existing functionality at once. There’s still room for improvement and with all the specs it’s a lot easier to change things. I’ll put it on my list of things to change.