FIX: Capture S3 metadata when calling create_multipart (#14161)

FIX: Capture S3 metadata when calling create_multipart (#14161)

The generate_presigned_put endpoint for direct external uploads (such as the one for the uppy-image-uploader) records allowed S3 metadata values on the uploaded object. We use this to store the sha1-checksum generated by the UppyChecksum plugin, for later comparison in ExternalUploadManager.

However, we were not doing this for the create_multipart endpoint, so the checksum was never captured and compared correctly.

Also includes a fix to make sure UppyChecksum is the last preprocessor to run. It is important that the UppyChecksum preprocessor is the last one to be added; the preprocessors are run in order and since other preprocessors may modify the file (e.g. the UppyMediaOptimization one), we need to checksum once we are sure the file data has “settled”.

diff --git a/app/assets/javascripts/discourse/app/mixins/composer-upload-uppy.js b/app/assets/javascripts/discourse/app/mixins/composer-upload-uppy.js
index 824a9a6..815c8e4 100644
--- a/app/assets/javascripts/discourse/app/mixins/composer-upload-uppy.js
+++ b/app/assets/javascripts/discourse/app/mixins/composer-upload-uppy.js
@@ -133,16 +133,6 @@ export default Mixin.create({
       },
     });
 
-    this._uppyInstance.use(DropTarget, { target: this.element });
-    this._uppyInstance.use(UppyChecksum, { capabilities: this.capabilities });
-
-    // TODO (martin) Need a more automatic way to do this for preprocessor
-    // plugins like UppyChecksum and UppyMediaOptimization so people don't
-    // have to remember to do this, also want to wrap this.uppy.emit in those
-    // classes so people don't have to remember to pass through the plugin class
-    // name for the preprocess-X events.
-    this._trackPreProcessorStatus(UppyChecksum);
-
     // hidden setting like enable_experimental_image_uploader
     if (this.siteSettings.enable_direct_s3_uploads) {
       this._useS3MultipartUploads();
@@ -233,6 +223,20 @@ export default Mixin.create({
     });
 
     this._setupPreprocessing();
+
+    // It is important that the UppyChecksum preprocessor is the last one to
+    // be added; the preprocessors are run in order and since other preprocessors
+    // may modify the file (e.g. the UppyMediaOptimization one), we need to
+    // checksum once we are sure the file data has "settled".
+    this._uppyInstance.use(UppyChecksum, { capabilities: this.capabilities });
+    this._uppyInstance.use(DropTarget, { target: this.element });
+
+    // TODO (martin) Need a more automatic way to do this for preprocessor
+    // plugins like UppyChecksum and UppyMediaOptimization so people don't
+    // have to remember to do this, also want to wrap this.uppy.emit in those
+    // classes so people don't have to remember to pass through the plugin class
+    // name for the preprocess-X events.
+    this._trackPreProcessorStatus(UppyChecksum);
   },
 
   _handleUploadError(file, error, response) {
@@ -373,19 +377,30 @@ export default Mixin.create({
       limit: 10,
 
       createMultipartUpload(file) {
+        const data = {
+          file_name: file.name,
+          file_size: file.size,
+          upload_type: file.meta.upload_type,
+          metadata: file.meta,
+        };
+
+        // the sha1 checksum is set by the UppyChecksum plugin, except
+        // for in cases where the browser does not support the required
+        // crypto mechanisms or an error occurs. it is an additional layer
+        // of security, and not required.
+        if (file.meta.sha1_checksum) {
+          data.metadata = { "sha1-checksum": file.meta.sha1_checksum };
+        }
+
         return ajax("/uploads/create-multipart.json", {
           type: "POST",
-          data: {
-            file_name: file.name,
-            file_size: file.size,
-            upload_type: file.meta.upload_type,
-          },
+          data,
           // uppy is inconsistent, an error here fires the upload-error event
-        }).then((data) => {
-          file.meta.unique_identifier = data.unique_identifier;
+        }).then((responseData) => {
+          file.meta.unique_identifier = responseData.unique_identifier;
           return {
-            uploadId: data.external_upload_identifier,
-            key: data.key,
+            uploadId: responseData.external_upload_identifier,
+            key: responseData.key,
           };
         });
       },
diff --git a/app/controllers/uploads_controller.rb b/app/controllers/uploads_controller.rb
index 0a5c0bb..c1c124e 100644
--- a/app/controllers/uploads_controller.rb
+++ b/app/controllers/uploads_controller.rb
@@ -224,19 +224,7 @@ class UploadsController < ApplicationController
       )
     end
 
-    # don't want people posting arbitrary S3 metadata so we just take the
-    # one we need. all of these will be converted to x-amz-meta- metadata
-    # fields in S3 so it's best to use dashes in the names for consistency
-    #
-    # this metadata is baked into the presigned url and is not altered when
-    # sending the PUT from the clientside to the presigned url
-    metadata = if params[:metadata].present?
-      meta = {}
-      if params[:metadata]["sha1-checksum"].present?
-        meta["sha1-checksum"] = params[:metadata]["sha1-checksum"]
-      end
-      meta
-    end
+    metadata = parse_allowed_metadata(params[:metadata])
 
     url = Discourse.store.signed_url_for_temporary_upload(
       file_name, metadata: metadata
@@ -313,9 +301,11 @@ class UploadsController < ApplicationController
       )
     end
 
+    metadata = parse_allowed_metadata(params[:metadata])
+
     begin
       multipart_upload = Discourse.store.create_multipart(
-        file_name, content_type
+        file_name, content_type, metadata: metadata
       )
     rescue Aws::S3::Errors::ServiceError => err
       debug_upload_error(err, "upload.create_mutlipart_failure")
@@ -579,4 +569,15 @@ class UploadsController < ApplicationController
     return if !SiteSetting.enable_upload_debug_mode
     Discourse.warn_exception(err, message: I18n.t(translation_key, translation_params))
   end
+
+  # don't want people posting arbitrary S3 metadata so we just take the
+  # one we need. all of these will be converted to x-amz-meta- metadata
+  # fields in S3 so it's best to use dashes in the names for consistency
+  #
+  # this metadata is baked into the presigned url and is not altered when
+  # sending the PUT from the clientside to the presigned url
+  def parse_allowed_metadata(metadata)
+    return if metadata.blank?
+    metadata.permit("sha1-checksum").to_h
+  end
 end
diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb
index 53c54e6..82fadc4 100644
--- a/lib/file_store/s3_store.rb
+++ b/lib/file_store/s3_store.rb
@@ -319,13 +319,14 @@ module FileStore
       )
     end
 
-    def create_multipart(file_name, content_type)
+    def create_multipart(file_name, content_type, metadata: {})
       key = temporary_upload_path(file_name)
       response = s3_helper.s3_client.create_multipart_upload(
         acl: "private",
         bucket: s3_bucket_name,
         key: key,
-        content_type: content_type
+        content_type: content_type,
+        metadata: metadata
       )
       { upload_id: response.upload_id, key: key }
     end
diff --git a/spec/requests/uploads_controller_spec.rb b/spec/requests/uploads_controller_spec.rb
index c50711a..5463faa 100644
--- a/spec/requests/uploads_controller_spec.rb
+++ b/spec/requests/uploads_controller_spec.rb
@@ -802,8 +802,6 @@ describe UploadsController do
         expect(response.status).to eq(400)
         post "/uploads/create-multipart.json", params: { upload_type: "composer" }
         expect(response.status).to eq(400)
-        post "/uploads/create-multipart.json", params: { content_type: "image/jpeg" }
-        expect(response.status).to eq(400)
       end
 
       it "returns 422 when the create request errors" do
@@ -813,7 +811,6 @@ describe UploadsController do
             file_name: "test.png",
             file_size: 1024,
             upload_type: "composer",
-            content_type: "image/png"
           }
         }
         expect(response.status).to eq(422)
@@ -826,7 +823,6 @@ describe UploadsController do
             file_name: "test.zip",
             file_size: 9999999,
             upload_type: "composer",
-            content_type: "application/zip"
           }
         }
         expect(response.status).to eq(422)
@@ -855,7 +851,6 @@ describe UploadsController do
             file_name: "test.png",
             file_size: 1024,
             upload_type: "composer",
-            content_type: "image/png"
           }
         }
 
@@ -878,6 +873,27 @@ describe UploadsController do
         expect(result["key"]).to eq(external_upload_stub.last.key)
       end
 

[... diff too long, it was truncated ...]

GitHub sha: 99ec8eb6df87d11c4641e118c4cecd19cf4f1f12

This commit appears in #14161 which was approved by eviltrout. It was merged by martin.