DEV: Class that converts MD with old attachment links to new MD.

DEV: Class that converts MD with old attachment links to new MD.

diff --git a/Gemfile b/Gemfile
index d0878a8..1caacda 100644
--- a/Gemfile
+++ b/Gemfile
@@ -151,6 +151,7 @@ group :development do
   gem 'bullet', require: !!ENV['BULLET']
   gem 'better_errors'
   gem 'binding_of_caller'
+  gem 'diffy'
 
   # waiting on 2.7.5 per: https://github.com/ctran/annotate_models/pull/595
   if rails_master?
diff --git a/Gemfile.lock b/Gemfile.lock
index db8907d..0fa3580 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -90,6 +90,7 @@ GEM
     crass (1.0.4)
     debug_inspector (0.0.3)
     diff-lcs (1.3)
+    diffy (3.3.0)
     discourse-ember-source (3.8.0.1)
     discourse_image_optim (0.26.2)
       exifr (~> 1.2, >= 1.2.2)
@@ -435,6 +436,7 @@ DEPENDENCIES
   certified
   colored2
   cppjieba_rb
+  diffy
   discourse-ember-source (~> 3.8.0)
   discourse_image_optim
   email_reply_trimmer (~> 0.1)
diff --git a/app/services/inline_uploads.rb b/app/services/inline_uploads.rb
new file mode 100644
index 0000000..4e3cece
--- /dev/null
+++ b/app/services/inline_uploads.rb
@@ -0,0 +1,151 @@
+# frozen_string_literal: true
+
+require_dependency "pretty_text"
+
+class InlineUploads
+  def self.process(markdown, on_missing: nil)
+    markdown = markdown.dup
+    cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown))
+    link_occurences = []
+
+    cooked_fragment.traverse do |node|
+      if node.name == "img"
+        # Do nothing
+      elsif !(node.children.count == 1 && (node.children[0].name != "img" && node.children[0].children.blank?))
+        next
+      end
+
+      if seen_link = matched_uploads(node).first
+        if actual_link = (node.attributes["href"]&.value || node.attributes["src"]&.value)
+          link_occurences << [actual_link, true]
+        else
+          link_occurences << [seen_link, false]
+        end
+      end
+    end
+
+    raw_fragment = Nokogiri::HTML::fragment(markdown)
+
+    raw_fragment.traverse do |node|
+      if node.name == "img"
+        # Do nothing
+      elsif !(node.children.count == 0 || (node.children.count == 1 && node.children[0].children.blank?))
+        next
+      end
+
+      matches = matched_uploads(node)
+      next if matches.blank?
+      links = extract_links(node)
+
+      matches.zip(links).each do |_match, link|
+        seen_link, is_valid = link_occurences.shift
+        next unless (link && is_valid)
+
+        if link.include?(seen_link)
+          begin
+            uri = URI(link)
+          rescue URI::Error
+          end
+
+          if !Discourse.store.external?
+            next if uri&.host && uri.host != Discourse.current_hostname
+          end
+
+          upload = Upload.get_from_url(link)
+
+          if upload
+            new_node =
+              case node.name
+              when 'a'
+                attachment_postfix =
+                  if node.attributes["class"]&.value&.split(" ")&.include?("attachment")
+                    "|attachment"
+                  else
+                    ""
+                  end
+
+                text = node.children.text.strip.gsub("\n", "").gsub(/ +/, " ")
+
+                markdown.sub!(
+                  node.to_s,
+                  "[#{text}#{attachment_postfix}](#{upload.short_url})"
+                )
+              when "img"
+                text = node.attributes["alt"]&.value
+                width = node.attributes["width"]&.value
+                height = node.attributes["height"]&.value
+                text = "#{text}|#{width}x#{height}" if width && height
+                markdown.sub!(node.to_s, "![#{text}](#{upload.short_url})")
+              else
+                if markdown =~ /\[img\]\s?#{link}\s?\[\/img\]/
+                  capture = Regexp.last_match[0]
+
+                  if capture
+                    markdown.sub!(capture, "![](#{upload.short_url})")
+                  end
+                elsif markdown =~ /(!?\[([a-z0-9|]+)\]\([a-zA-z0-9\.\/]+\))/
+                  capture = Regexp.last_match[0]
+
+                  if capture
+                    markdown.sub!(capture, "![#{Regexp.last_match[2]}](#{upload.short_url})")
+                  end
+                end
+              end
+
+          else
+            on_missing.call(link) if on_missing
+          end
+        end
+      end
+    end
+
+    markdown
+  end
+
+  def self.matched_uploads(node)
+    matches = []
+
+    regexps = [
+      /(upload:\/\/([a-zA-Z0-9]+)[a-z0-9\.]*)/,
+      /(\/uploads\/short-url\/([a-zA-Z0-9]+)[a-z0-9\.]*)/,
+    ]
+
+    db = RailsMultisite::ConnectionManagement.current_db
+
+    if Discourse.store.external?
+      if Rails.configuration.multisite
+        regexps << /(#{SiteSetting.Upload.s3_base_url}\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
+        regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
+      else
+        regexps << /(#{SiteSetting.Upload.s3_base_url}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
+        regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
+        regexps << /(\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
+      end
+    else
+      regexps << /(\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
+    end
+
+    node = node.to_s
+
+    regexps.each do |regexp|
+      node.scan(regexp).each do |matched|
+        matches << matched[0]
+      end
+    end
+
+    matches
+  end
+  private_class_method :matched_uploads
+
+  def self.extract_links(node)
+    links = []
+    links << node.attributes["href"]&.value
+    links << node.attributes["src"]&.value
+    links = links.concat(node.to_s.scan(/\[img\]\s?(.+)\s?\[\/img\]/))
+    links = links.concat(node.to_s.scan(/!?\[[a-z0-9|]+\]\(([a-zA-z0-9\.\/]+)\)/))
+    links.flatten!
+    links.compact!
+    links
+  end
+  private_class_method :extract_links
+end
diff --git a/lib/tasks/posts.rake b/lib/tasks/posts.rake
index 4658087..7da89fe 100644
--- a/lib/tasks/posts.rake
+++ b/lib/tasks/posts.rake
@@ -649,3 +649,44 @@ task 'posts:invalidate_broken_images' => :environment do
   puts
   puts "", "#{rebaked} posts rebaked!"
 end
+
+desc "Coverts full upload URLs in `Post#raw` to short upload url"
+task 'posts:inline_uploads' => :environment do |_, args|
+  dry_run = ENV["DRY_RUN"] || true
+
+  scope = Post.joins(:post_uploads)
+    .distinct("posts.id")
+    .where("raw LIKE '%class=\"attachment%' OR raw LIKE '%<img src=\"%'")
+
+  affected_posts_count = scope.count
+  fixed_count = 0
+  not_corrected_post_ids = []
+
+  scope.find_each do |post|
+    new_raw = InlineUploads.process(post.raw)
+
+    if post.raw != new_raw
+      if dry_run
+        puts "Post id #{post.id} raw changed!"
+        Diffy::Diff.default_format = :color
+        puts Diffy::Diff.new(post.raw, new_raw, context: 1)
+      else
+        putc "."
+      end
+
+      fixed_count += 1
+    else
+      not_corrected_post_ids << post.id
+    end
+  end
+
+  puts "#{fixed_count} out of #{affected_posts_count} affected posts corrected"
+
+  if fixed_count != affected_posts_count
+    puts "Ids of posts that were not correct: #{not_corrected_post_ids}"
+  end
+
+  if dry_run
+
+  end
+end
diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb
index 4c9f775..8c54009 100644
--- a/spec/fabricators/upload_fabricator.rb
+++ b/spec/fabricators/upload_fabricator.rb
@@ -24,12 +24,15 @@ end
 Fabricator(:upload_s3, from: :upload) do
   url do |attrs|
     sequence(:url) do |n|
-      File.join(
-        Discourse.store.absolute_base_url,
-        Discourse.store.get_path_for(
-          "original", n + 1, attrs[:sha1], ".#{attrs[:extension]}"
-        )
+      path = +Discourse.store.get_path_for(
+        "original", n + 1, attrs[:sha1], ".#{attrs[:extension]}"
       )
+
+      if Rails.configuration.multisite
+        path.prepend(File.join(Discourse.store.upload_path, "/"))
+      end
+

[... diff too long, it was truncated ...]

GitHub sha: d93e5fb0