DEV: minor improvements in the vanilla import script. (#14026)

DEV: minor improvements in the vanilla import script. (#14026)

We’re parsing the post raw based on the record format now.

diff --git a/script/import_scripts/vanilla_mysql.rb b/script/import_scripts/vanilla_mysql.rb
index 459ba65..da20a58 100644
--- a/script/import_scripts/vanilla_mysql.rb
+++ b/script/import_scripts/vanilla_mysql.rb
@@ -336,7 +336,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
           user_id: user_id,
           title: discussion['Name'],
           category: category_id_from_imported_category_id(discussion['CategoryID']) || @category_mappings[discussion['CategoryID']].try(:[], :category_id),
-          raw: @vb_parser ? VanillaBodyParser.new(discussion, user_id).parse : process_raw(discussion['Body']),
+          raw: get_raw(discussion, user_id),
           views: discussion['CountViews'] || 0,
           closed: discussion['Closed'] == 1,
           pinned_at: discussion['Announce'] == 0 ? nil : Time.zone.at(discussion['DateLastComment'] || discussion['DateInserted']),
@@ -381,7 +381,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
           id: "comment#" + comment['CommentID'].to_s,
           user_id: user_id,
           topic_id: t[:topic_id],
-          raw: @vb_parser ? VanillaBodyParser.new(comment, user_id).parse : process_raw(comment['Body']),
+          raw: get_raw(comment, user_id),
           created_at: Time.zone.at(comment['DateInserted'])
         }
 
@@ -449,7 +449,7 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
 
       create_posts(messages, total: total_count, offset: offset) do |message|
         user_id = user_id_from_imported_user_id(message['InsertUserID']) || Discourse::SYSTEM_USER_ID
-        body = @vb_parser ? VanillaBodyParser.new(message, user_id).parse : process_raw(message['Body'])
+        body = get_raw(message, user_id)
 
         common = {
           user_id: user_id,
@@ -486,14 +486,30 @@ class ImportScripts::VanillaSQL < ImportScripts::Base
     end
   end
 
-  def process_raw(raw)
+  def get_raw(record, user_id)
+    format = (record['Format'] || "").downcase
+    body = record['Body']
+
+    case format
+    when "html"
+      process_raw(body)
+    when "rich"
+      VanillaBodyParser.new(record, user_id).parse
+    when "markdown"
+      process_raw(body, skip_reverse_markdown: true)
+    else
+      @vb_parser ? VanillaBodyParser.new(record, user_id).parse : process_raw(body)
+    end
+  end
+
+  def process_raw(raw, skip_reverse_markdown: false)
     return if raw == nil
     raw = @htmlentities.decode(raw)
 
     # convert user profile links to user mentions
     raw.gsub!(/<a.*>(@\S+?)<\/a>/) { $1 }
 
-    raw = ReverseMarkdown.convert(raw)
+    raw = ReverseMarkdown.convert(raw) unless skip_reverse_markdown
 
     raw.scrub!
 

GitHub sha: cd9262b7d3af8b12a9b1eac396ca78ed9aae219c

This commit appears in #14026 which was approved by CvX. It was merged by vinothkannans.