FIX: S3 store has_been_uploaded? was not taking into account s3 bucket path (#9810)

FIX: S3 store has_been_uploaded? was not taking into account s3 bucket path (#9810)

In some cases, between Discourse forums the hostname of a URL could match if they are hosting S3 files on the same bucket but the S3 bucket path might not. So e.g. https://testbucket.somesite.com/testpath/some/file/url.png vs https://testbucket.somesite.com/prodpath/some/file/url.png. So has_been_uploaded? was returning true for the second URL, even though it may have been uploaded on a different Discourse forum.

This is a very rare case but must be accounted for, because this impacts UrlHelper.is_local which mistakenly thinks the file has already been downloaded and thus allows the URL to be cooked, where we want to return the full URL to be downloaded using PullHotlinkedImages.

diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb
index 6a26ebd..46dece4 100644
--- a/lib/file_store/s3_store.rb
+++ b/lib/file_store/s3_store.rb
@@ -79,12 +79,30 @@ module FileStore
     def has_been_uploaded?(url)
       return false if url.blank?
 
+      parsed_url = URI.parse(url)
       base_hostname = URI.parse(absolute_base_url).hostname
-      return true if url[base_hostname]
+      if url[base_hostname]
+        # if the hostnames match it means the upload is in the same
+        # bucket on s3. however, the bucket folder path may differ in
+        # some cases, and we do not want to assume the url is uploaded
+        # here. e.g. the path of the current site could be /prod and the
+        # other site could be /staging
+        if s3_bucket_folder_path.present?
+          return parsed_url.path.starts_with?("/#{s3_bucket_folder_path}")
+        else
+          return true
+        end
+        return false
+      end
 
       return false if SiteSetting.Upload.s3_cdn_url.blank?
       cdn_hostname = URI.parse(SiteSetting.Upload.s3_cdn_url || "").hostname
-      cdn_hostname.presence && url[cdn_hostname]
+      return true if cdn_hostname.presence && url[cdn_hostname]
+      false
+    end
+
+    def s3_bucket_folder_path
+      @s3_helper.s3_bucket_folder_path
     end
 
     def s3_bucket_name
diff --git a/spec/components/file_store/s3_store_spec.rb b/spec/components/file_store/s3_store_spec.rb
index 99f57c7..81e39d0 100644
--- a/spec/components/file_store/s3_store_spec.rb
+++ b/spec/components/file_store/s3_store_spec.rb
@@ -251,7 +251,7 @@ describe FileStore::S3Store do
 
       before do
         optimized_image.update!(
-          url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com#{image_path}"
+          url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{image_path}"
         )
       end
 
@@ -272,6 +272,12 @@ describe FileStore::S3Store do
           SiteSetting.s3_upload_bucket = "s3-upload-bucket/discourse-uploads"
         end
 
+        before do
+          optimized_image.update!(
+            url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/discourse-uploads/#{image_path}"
+          )
+        end
+
         it "removes the file from s3 with the right paths" do
           s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once
           s3_object = stub
diff --git a/spec/multisite/s3_store_spec.rb b/spec/multisite/s3_store_spec.rb
index 922c5f4..064c699 100644
--- a/spec/multisite/s3_store_spec.rb
+++ b/spec/multisite/s3_store_spec.rb
@@ -217,4 +217,47 @@ RSpec.describe 'Multisite s3 uploads', type: :multisite do
       end
     end
   end
+
+  describe "#has_been_uploaded?" do
+    before do
+      SiteSetting.s3_region = 'us-west-1'
+      SiteSetting.s3_upload_bucket = "s3-upload-bucket/test"
+      SiteSetting.s3_access_key_id = "s3-access-key-id"
+      SiteSetting.s3_secret_access_key = "s3-secret-access-key"
+      SiteSetting.enable_s3_uploads = true
+    end
+
+    let(:store) { FileStore::S3Store.new }
+    let(:client) { Aws::S3::Client.new(stub_responses: true) }
+    let(:resource) { Aws::S3::Resource.new(client: client) }
+    let(:s3_bucket) { resource.bucket(SiteSetting.s3_upload_bucket) }
+    let(:s3_helper) { store.s3_helper }
+
+    it "returns false for blank urls" do
+      url = ""
+      expect(store.has_been_uploaded?(url)).to eq(false)
+    end
+
+    it "returns true if the base hostname is the same for both urls" do
+      url = "https://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
+      expect(store.has_been_uploaded?(url)).to eq(true)
+    end
+
+    it "returns false if the base hostname is the same for both urls BUT the bucket name is different in the path" do
+      bucket = "someotherbucket"
+      url = "https://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{bucket}/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
+      expect(store.has_been_uploaded?(url)).to eq(false)
+    end
+
+    it "returns false if the hostnames do not match and the s3_cdn_url is blank" do
+      url = "https://www.someotherhostname.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
+      expect(store.has_been_uploaded?(url)).to eq(false)
+    end
+
+    it "returns true if the s3_cdn_url is present and matches the url hostname" do
+      SiteSetting.s3_cdn_url = "https://www.someotherhostname.com"
+      url = "https://www.someotherhostname.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
+      expect(store.has_been_uploaded?(url)).to eq(true)
+    end
+  end
 end

GitHub sha: 72f13919

1 Like

This commit appears in #9810 which was approved by eviltrout. It was merged by martin.