DEV: Add vBulletin5 bulk importer (#12904)

DEV: Add vBulletin5 bulk importer (#12904)

This is a pretty straightforward bulk importer, just tailored to the vBulletin 5 database structure.

Also made a few minor improvements to the base importer – should be self explanatory in the code.

diff --git a/script/bulk_import/base.rb b/script/bulk_import/base.rb
index 261110a..a04c7de 100644
--- a/script/bulk_import/base.rb
+++ b/script/bulk_import/base.rb
@@ -627,6 +627,10 @@ class BulkImport::Base
     raw.gsub!(/\[TD\](.*?)\[\/TD\]/im, "\\1")
     raw.gsub!(/\[TD="?.*?"?\](.*?)\[\/TD\]/im, "\\1")
 
+    # [STRIKE]
+    raw.gsub!(/\[STRIKE\]/i, "<s>")
+    raw.gsub!(/\[\/STRIKE\]/i, "</s>")
+
     # [QUOTE]...[/QUOTE]
     raw.gsub!(/\[QUOTE="([^\]]+)"\]/i) { "[QUOTE=#{$1}]" }
 
@@ -644,7 +648,7 @@ class BulkImport::Base
 
       username = @mapped_usernames[imported_username] || imported_username
       post_number = post_number_from_imported_id(imported_postid)
-      topic_id = topic_id_from_imported_post_id(imported_post_id)
+      topic_id = topic_id_from_imported_post_id(imported_postid)
 
       if post_number && topic_id
         "\n[quote=\"#{username}, post:#{post_number}, topic:#{topic_id}\"]\n"
@@ -668,9 +672,9 @@ class BulkImport::Base
     # (basically, we're only missing list=a here...)
     # (https://meta.discourse.org/t/phpbb-3-importer-old/17397)
     raw.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]')
-    raw.gsub!(/\[list=1\](.*?)\[\/list\]/im, '[ol]\1[/ol]')
+    raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list\]/im, '[ol]\1[/ol]')
     raw.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]')
-    raw.gsub!(/\[list=1\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]')
+    raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]')
     # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
     raw.gsub!(/\[\*\]\n/, '')
     raw.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')
@@ -749,6 +753,7 @@ class BulkImport::Base
     name.gsub!(/[^A-Za-z0-9]+$/, "")
     name.gsub!(/([-_.]{2,})/) { $1.first }
     name.strip!
+    name.truncate(60)
     name
   end
 
@@ -757,7 +762,7 @@ class BulkImport::Base
   end
 
   def random_email
-    "#{SecureRandom.hex}@ema.il"
+    "#{SecureRandom.hex}@email.invalid"
   end
 
   def pre_cook(raw)
diff --git a/script/bulk_import/vbulletin5.rb b/script/bulk_import/vbulletin5.rb
new file mode 100644
index 0000000..507eb3d
--- /dev/null
+++ b/script/bulk_import/vbulletin5.rb
@@ -0,0 +1,781 @@
+# frozen_string_literal: true
+
+require_relative "base"
+require "cgi"
+require "set"
+require "mysql2"
+require "htmlentities"
+require 'ruby-bbcode-to-md'
+require 'find'
+
+class BulkImport::VBulletin5 < BulkImport::Base
+
+  DB_PREFIX = ""
+  SUSPENDED_TILL ||= Date.new(3000, 1, 1)
+  ATTACH_DIR ||= ENV['ATTACH_DIR'] || '/shared/import/data/attachments'
+  AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
+  ROOT_NODE = 2
+
+  def initialize
+    super
+
+    host     = ENV["DB_HOST"] || "localhost"
+    username = ENV["DB_USERNAME"] || "root"
+    password = ENV["DB_PASSWORD"]
+    database = ENV["DB_NAME"] || "vbulletin"
+    charset  = ENV["DB_CHARSET"] || "utf8"
+
+    @html_entities = HTMLEntities.new
+    @encoding = CHARSET_MAP[charset]
+    @bbcode_to_md = true
+
+    @client = Mysql2::Client.new(
+      host: host,
+      username: username,
+      password: password,
+      database: database,
+      encoding: charset,
+      reconnect: true
+    )
+
+    @client.query_options.merge!(as: :array, cache_rows: false)
+
+    # TODO: Add `LIMIT 1` to the below queries
+    # ------
+    # be aware there may be other contenttypeid's in use, such as poll, link, video, etc.
+    @forum_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][0]
+    @channel_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[0][0]
+    @text_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0]
+  end
+
+  def execute
+    # enable as per requirement:
+    #SiteSetting.automatic_backups_enabled = false
+    #SiteSetting.disable_emails = "non-staff"
+    #SiteSetting.authorized_extensions = '*'
+    #SiteSetting.max_image_size_kb = 102400
+    #SiteSetting.max_attachment_size_kb = 102400
+    #SiteSetting.clean_up_uploads = false
+    #SiteSetting.clean_orphan_uploads_grace_period_hours = 43200
+    #SiteSetting.max_category_nesting = 3
+
+    import_groups
+    import_users
+    import_group_users
+
+    import_user_emails
+    import_user_stats
+    import_user_profiles
+    import_user_account_id
+
+    import_categories
+    import_topics
+    import_topic_first_posts
+    import_replies
+
+    import_likes
+
+    import_private_topics
+    import_topic_allowed_users
+    import_private_first_posts
+    import_private_replies
+
+    create_oauth_records
+    create_permalinks
+    import_attachments
+  end
+
+  def import_groups
+    puts "Importing groups..."
+
+    groups = mysql_stream <<-SQL
+        SELECT usergroupid, title, description, usertitle
+          FROM #{DB_PREFIX}usergroup
+         WHERE usergroupid > #{@last_imported_group_id}
+      ORDER BY usergroupid
+    SQL
+
+    create_groups(groups) do |row|
+      {
+        imported_id: row[0],
+        name: normalize_text(row[1]),
+        bio_raw: normalize_text(row[2]),
+        title: normalize_text(row[3]),
+      }
+    end
+  end
+
+  def import_users
+    puts "Importing users..."
+
+    users = mysql_stream <<-SQL
+        SELECT u.userid, u.username, u.joindate, u.birthday,
+               u.ipaddress, u.usergroupid, ub.bandate, ub.liftdate, u.email
+          FROM #{DB_PREFIX}user u
+     LEFT JOIN #{DB_PREFIX}userban ub ON ub.userid = u.userid
+         WHERE u.userid > #{@last_imported_user_id}
+      ORDER BY u.userid
+    SQL
+
+    create_users(users) do |row|
+      u = {
+        imported_id: row[0],
+        username: normalize_text(row[1].truncate(60)),
+        name: normalize_text(row[1]),
+        email: row[8],
+        created_at: Time.zone.at(row[2]),
+        date_of_birth: parse_birthday(row[3]),
+        primary_group_id: group_id_from_imported_id(row[5]),
+        admin: row[5] == 6,
+        moderator: row[5] == 7
+      }
+      u[:ip_address] = row[4][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row[4].present?
+      if row[7]
+        u[:suspended_at] = Time.zone.at(row[6])
+        u[:suspended_till] = row[7] > 0 ? Time.zone.at(row[7]) : SUSPENDED_TILL
+      end
+      u
+    end
+  end
+
+  def import_user_emails
+    puts "Importing user emails..."
+
+    users = mysql_stream <<-SQL
+        SELECT u.userid, u.email, u.joindate
+          FROM #{DB_PREFIX}user u
+         WHERE u.userid > #{@last_imported_user_id}
+      ORDER BY u.userid
+    SQL
+
+    create_user_emails(users) do |row|
+      {
+        imported_id: row[0],
+        imported_user_id: row[0],
+        email: random_email,
+        created_at: Time.zone.at(row[2])
+      }
+    end
+  end
+
+  def import_user_stats
+    puts "Importing user stats..."
+
+    users = mysql_stream <<-SQL
+      SELECT u.userid, u.joindate, u.posts,
+             SUM(
+               CASE
+                 WHEN n.contenttypeid = #{@text_typeid}
+                  AND n.parentid IN ( select nodeid from #{DB_PREFIX}node where contenttypeid=#{@channel_typeid} )
+                 THEN 1
+                 ELSE 0
+               END
+             ) AS threads
+        FROM #{DB_PREFIX}user u
+        LEFT OUTER JOIN #{DB_PREFIX}node n ON u.userid = n.userid
+       WHERE u.userid > #{@last_imported_user_id}
+       GROUP BY u.userid
+       ORDER BY u.userid
+    SQL
+
+    create_user_stats(users) do |row|
+      user = {
+        imported_id: row[0],
+        imported_user_id: row[0],
+        new_since: Time.zone.at(row[1]),
+        post_count: row[2],
+        topic_count: row[3],
+      }
+
+      user
+    end
+  end
+
+  def import_group_users
+    puts "Importing group users..."
+
+    # import primary groups
+
+    group_users = mysql_stream <<-SQL
+      SELECT usergroupid, userid
+        FROM #{DB_PREFIX}user
+       WHERE userid > #{@last_imported_user_id}
+    SQL
+

[... diff too long, it was truncated ...]

GitHub sha: c1517e42

This commit appears in #12904 which was approved by ZogStriP. It was merged by justindirose.