FIX: Backups should use relative paths for local uploads

FIX: Backups should use relative paths for local uploads

This also ensures that restoring a backup works when it was created with the wrong upload paths in the time between ab4c0a4970163506e2c72884ff2ba2d8845eb10c (shortly after v2.6.0.beta1) and this fix.

diff --git a/lib/backup_restore/backup_file_handler.rb b/lib/backup_restore/backup_file_handler.rb
index 7b26356..e0244b4 100644
--- a/lib/backup_restore/backup_file_handler.rb
+++ b/lib/backup_restore/backup_file_handler.rb
@@ -67,6 +67,8 @@ module BackupRestore
       log "Unzipping archive, this may take a while..."
       Discourse::Utils.execute_command(
         'tar', '--extract', '--gzip', '--file', @archive_path, '--directory', @tmp_directory,
+        '--transform', 's|var/www/discourse/public/uploads/|uploads/|',
+        # the transformation is a workaround for a bug which existed between v2.6.0.beta1 and v2.6.0.beta2
         failure_message: "Failed to decompress archive."
       )
     end
diff --git a/lib/backup_restore/backuper.rb b/lib/backup_restore/backuper.rb
index 11c1d81..8f00090 100644
--- a/lib/backup_restore/backuper.rb
+++ b/lib/backup_restore/backuper.rb
@@ -234,15 +234,17 @@ module BackupRestore
       log "Archiving uploads..."
 
       if has_local_uploads?
+        upload_directory = Discourse.store.upload_path
+
         if SiteSetting.include_thumbnails_in_backups
           exclude_optimized = ""
         else
-          optimized_path = File.join(local_uploads_directory, 'optimized')
+          optimized_path = File.join(upload_directory, 'optimized')
           exclude_optimized = "--exclude=#{optimized_path}"
         end
 
         Discourse::Utils.execute_command(
-          'tar', '--append', '--dereference', exclude_optimized, '--file', tar_filename, local_uploads_directory,
+          'tar', '--append', '--dereference', exclude_optimized, '--file', tar_filename, upload_directory,
           failure_message: "Failed to archive uploads.", success_status_codes: [0, 1],
           chdir: File.join(Rails.root, "public")
         )
diff --git a/spec/fixtures/backups/backup_with_wrong_upload_path.tar.gz b/spec/fixtures/backups/backup_with_wrong_upload_path.tar.gz
new file mode 100644
index 0000000..8fa8afc
Binary files /dev/null and b/spec/fixtures/backups/backup_with_wrong_upload_path.tar.gz differ
diff --git a/spec/lib/backup_restore/backup_file_handler_spec.rb b/spec/lib/backup_restore/backup_file_handler_spec.rb
index ba4cf29..c9a12aa 100644
--- a/spec/lib/backup_restore/backup_file_handler_spec.rb
+++ b/spec/lib/backup_restore/backup_file_handler_spec.rb
@@ -7,7 +7,7 @@ describe BackupRestore::BackupFileHandler do
   include_context "shared stuff"
 
   def expect_decompress_and_clean_up_to_work(backup_filename:, expected_dump_filename: "dump.sql",
-                                             require_metadata_file:, require_uploads:)
+                                             require_metadata_file:, require_uploads:, expected_upload_paths: nil)
 
     freeze_time(DateTime.parse('2019-12-24 14:31:48'))
 
@@ -31,8 +31,13 @@ describe BackupRestore::BackupFileHandler do
       expect(File.exist?(File.join(tmp_directory, "meta.json"))).to eq(require_metadata_file)
 
       if require_uploads
-        upload_filename = "uploads/default/original/3X/b/d/bd269860bb508aebcb6f08fe7289d5f117830383.png"
-        expect(File.exist?(File.join(tmp_directory, upload_filename))).to eq(true)
+        expected_upload_paths ||= ["uploads/default/original/3X/b/d/bd269860bb508aebcb6f08fe7289d5f117830383.png"]
+
+        expected_upload_paths.each do |upload_path|
+          absolute_upload_path = File.join(tmp_directory, upload_path)
+          expect(File.exist?(absolute_upload_path)).to eq(true), "expected file #{upload_path} does not exist"
+          yield(absolute_upload_path) if block_given?
+        end
       else
         expect(Dir.exist?(File.join(tmp_directory, "uploads"))).to eq(false)
       end
@@ -74,4 +79,26 @@ describe BackupRestore::BackupFileHandler do
       require_uploads: false
     )
   end
+
+  it "works with backup file which uses wrong upload path" do
+    expect_decompress_and_clean_up_to_work(
+      backup_filename: "backup_with_wrong_upload_path.tar.gz",
+      require_metadata_file: false,
+      require_uploads: true,
+      expected_upload_paths: [
+        "uploads/default/original/1X/both.txt",
+        "uploads/default/original/1X/only-uploads.txt",
+        "uploads/default/original/1X/only-var.txt"
+      ]
+    ) do |upload_path|
+      content = File.read(upload_path).chomp
+
+      case File.basename(upload_path)
+      when "both.txt", "only-var.txt"
+        expect(content).to eq("var")
+      when "only-uploads.txt"
+        expect(content).to eq("uploads")
+      end
+    end
+  end
 end

GitHub sha: f51ccea0

1 Like

This commit has been mentioned on Discourse Meta. There might be relevant details there:

https://meta.discourse.org/t/old-image-uploads-become-broken-images/47236/55

@gschlager bsdtar bundled with macOS Catalina doesn’t support --transform flag:

RuntimeError:
  lib/discourse.rb:92:in `exec': Failed to decompress archive.
  tar: Option --transform is not supported
  Usage:
    List:    tar -tf <archive-filename>
    Extract: tar -xf <archive-filename>
    Create:  tar -cf <archive-filename> [filenames...]
    Help:    tar --help
❯ tar --help
tar(bsdtar): manipulate archive files
First option must be a mode specifier:
  -c Create  -r Add/Replace  -t List  -u Update  -x Extract
Common Options:
  -b #  Use # 512-byte records per I/O block
  -f <filename>  Location of archive
  -v    Verbose
  -w    Interactive
Create: tar -c [options] [<file> | <dir> | @<archive> | -C <dir> ]
  <file>, <dir>  add these items to archive
  -z, -j, -J, --lzma  Compress archive with gzip/bzip2/xz/lzma
  --format {ustar|pax|cpio|shar}  Select archive format
  --exclude <pattern>  Skip files that match pattern
  -C <dir>  Change to <dir> before processing remaining files
  @<archive>  Add entries from <archive> to output
List: tar -t [options] [<patterns>]
  <patterns>  If specified, list only entries that match
Extract: tar -x [options] [<patterns>]
  <patterns>  If specified, extract only entries that match
  -k    Keep (don't overwrite) existing files
  -m    Don't restore modification times
  -O    Write entries to stdout, don't restore to disk
  -p    Restore permissions (including ACLs, owner, file flags)
bsdtar 3.3.2 - libarchive 3.3.2 zlib/1.2.11 liblzma/5.0.5 bz2lib/1.0.6

It does work with gnu-tar:

❯ brew install gnu-tar
❯ PATH="/usr/local/opt/gnu-tar/libexec/gnubin:$PATH" bundle exec rspec spec/lib/backup_restore/backup_file_handler_spec.rb

Randomized with seed 20132
....

Finished in 0.56633 seconds (files took 5.68 seconds to load)
4 examples, 0 failures

Randomized with seed 20132

Is it possible to fix the bug without this flag? If not, we’ll have to update macOS setup guide with that brew install command and a note about adding “gnubin” directory to bashrc.

1 Like

Looks like we can get it to work with bsdtar by using the -s flag. https://stackoverflow.com/a/45300269

1 Like

Yup, I can confirm this

'-s', '|var/www/discourse/public/uploads/|uploads/|',

does work with bsdtar:

❯ b rspec spec/lib/backup_restore/backup_file_handler_spec.rb

Randomized with seed 55698
....

Finished in 0.24695 seconds (files took 4.08 seconds to load)
4 examples, 0 failures

Randomized with seed 55698

It’s not supported by gnu-tar, so we’ll have to detect bsdtar or just catch the error and retry with the other argument.

1 Like

What’s the output of tar --version on macOS?

It’s already hiding at the end of --help output above :sweat_smile:

❯ tar --version
bsdtar 3.3.2 - libarchive 3.3.2 zlib/1.2.11 liblzma/5.0.5 bz2lib/1.0.6
1 Like