VBulletin5 importer improvements (#9477)

VBulletin5 importer improvements (#9477)

  • no more hard coded contenttypes
  • permalinks for topics, categories, subcategories
  • better uploads handling
  • tag support
diff --git a/script/import_scripts/vbulletin5.rb b/script/import_scripts/vbulletin5.rb
index 3848931..682cbcd 100644
--- a/script/import_scripts/vbulletin5.rb
+++ b/script/import_scripts/vbulletin5.rb
@@ -6,14 +6,19 @@ require 'htmlentities'
 
 class ImportScripts::VBulletin < ImportScripts::Base
   BATCH_SIZE = 1000
-  DBPREFIX = "vb_"
   ROOT_NODE = 2
-
-  # CHANGE THESE BEFORE RUNNING THE IMPORTER
-  DATABASE = "yourforum"
   TIMEZONE = "America/Los_Angeles"
-  ATTACHMENT_DIR = '/home/discourse/yourforum/customattachments/'
-  AVATAR_DIR = '/home/discourse/yourforum/avatars/'
+
+  # override these using environment vars
+
+  URL_PREFIX ||= ENV['URL_PREFIX'] || "forum/"
+  DB_PREFIX ||= ENV['DB_PREFIX'] || "vb_"
+  DB_HOST ||= ENV['DB_HOST'] || "localhost"
+  DB_NAME ||= ENV['DB_NAME'] || "vbulletin"
+  DB_PASS ||= ENV['DB_PASS'] || "password"
+  DB_USER ||= ENV['DB_USER'] || "username"
+  ATTACH_DIR ||= ENV['ATTACH_DIR'] || "/home/discourse/vbulletin/attach"
+  AVATAR_DIR ||= ENV['AVATAR_DIR'] || "/home/discourse/vbulletin/avatars"
 
   def initialize
     super
@@ -25,12 +30,15 @@ class ImportScripts::VBulletin < ImportScripts::Base
     @htmlentities = HTMLEntities.new
 
     @client = Mysql2::Client.new(
-      host: "localhost",
-      username: "root",
-      database: DATABASE,
-      password: "password"
+      host: DB_HOST,
+      username: DB_USER,
+      database: DB_NAME,
+      password: DB_PASS
     )
 
+    @forum_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").first['contenttypeid']
+    @channel_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").first['contenttypeid']
+    @text_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").first['contenttypeid']
   end
 
   def execute
@@ -40,8 +48,10 @@ class ImportScripts::VBulletin < ImportScripts::Base
     import_topics
     import_posts
     import_attachments
+    import_tags
     close_topics
     post_process_posts
+    create_permalinks
   end
 
   def import_groups
@@ -49,7 +59,7 @@ class ImportScripts::VBulletin < ImportScripts::Base
 
     groups = mysql_query <<-SQL
         SELECT usergroupid, title
-          FROM #{DBPREFIX}usergroup
+          FROM #{DB_PREFIX}usergroup
       ORDER BY usergroupid
     SQL
 
@@ -64,7 +74,7 @@ class ImportScripts::VBulletin < ImportScripts::Base
   def import_users
     puts "", "importing users"
 
-    user_count = mysql_query("SELECT COUNT(userid) count FROM #{DBPREFIX}user").first["count"]
+    user_count = mysql_query("SELECT COUNT(userid) count FROM #{DB_PREFIX}user").first["count"]
 
     batches(BATCH_SIZE) do |offset|
       users = mysql_query <<-SQL
@@ -73,8 +83,8 @@ class ImportScripts::VBulletin < ImportScripts::Base
                  WHEN u.scheme='legacy' THEN REPLACE(token, ' ', ':')
             END AS password,
             IF(ug.title = 'Administrators', 1, 0) AS admin
-            FROM #{DBPREFIX}user u
-            LEFT JOIN #{DBPREFIX}usergroup ug ON ug.usergroupid = u.usergroupid
+            FROM #{DB_PREFIX}user u
+            LEFT JOIN #{DB_PREFIX}usergroup ug ON ug.usergroupid = u.usergroupid
         ORDER BY userid
            LIMIT #{BATCH_SIZE}
           OFFSET #{offset}
@@ -101,7 +111,7 @@ class ImportScripts::VBulletin < ImportScripts::Base
           post_create_action: proc do |u|
             @old_username_to_new_usernames[user["username"]] = u.username
             import_profile_picture(user, u)
-            import_profile_background(user, u)
+            # import_profile_background(user, u)
           end
         }
       end
@@ -111,7 +121,7 @@ class ImportScripts::VBulletin < ImportScripts::Base
   def import_profile_picture(old_user, imported_user)
     query = mysql_query <<-SQL
         SELECT filedata, filename
-          FROM #{DBPREFIX}customavatar
+          FROM #{DB_PREFIX}customavatar
          WHERE userid = #{old_user["userid"]}
       ORDER BY dateline DESC
          LIMIT 1
@@ -148,7 +158,7 @@ class ImportScripts::VBulletin < ImportScripts::Base
   def import_profile_background(old_user, imported_user)
     query = mysql_query <<-SQL
         SELECT filedata, filename
-          FROM #{DBPREFIX}customprofilepic
+          FROM #{DB_PREFIX}customprofilepic
          WHERE userid = #{old_user["userid"]}
       ORDER BY dateline DESC
          LIMIT 1
@@ -176,13 +186,13 @@ class ImportScripts::VBulletin < ImportScripts::Base
     puts "", "importing top level categories..."
 
     categories = mysql_query("SELECT nodeid AS forumid, title, description, displayorder, parentid
-	      FROM #{DBPREFIX}node
+	      FROM #{DB_PREFIX}node
           WHERE parentid=#{ROOT_NODE}
         UNION
           SELECT nodeid, title, description, displayorder, parentid
-          FROM #{DBPREFIX}node
-          WHERE contenttypeid = 23
-            AND parentid IN (SELECT nodeid FROM #{DBPREFIX}node WHERE parentid=#{ROOT_NODE})").to_a
+          FROM #{DB_PREFIX}node
+          WHERE contenttypeid = #{@channel_typeid}
+            AND parentid IN (SELECT nodeid FROM #{DB_PREFIX}node WHERE parentid=#{ROOT_NODE})").to_a
 
     top_level_categories = categories.select { |c| c["parentid"] == ROOT_NODE }
 
@@ -224,19 +234,26 @@ class ImportScripts::VBulletin < ImportScripts::Base
     # keep track of closed topics
     @closed_topic_ids = []
 
-    topic_count = mysql_query("select count(nodeid) cnt from #{DBPREFIX}node where parentid in (
-        select nodeid from #{DBPREFIX}node where contenttypeid=23 ) and contenttypeid=22;").first["cnt"]
+    topic_count = mysql_query("SELECT COUNT(nodeid) cnt
+        FROM #{DB_PREFIX}node
+        WHERE (unpublishdate = 0 OR unpublishdate IS NULL)
+        AND (approved = 1 AND showapproved = 1)
+        AND parentid IN (
+        SELECT nodeid FROM #{DB_PREFIX}node WHERE contenttypeid=#{@channel_typeid} ) AND contenttypeid=#{@text_typeid};"
+    ).first["cnt"]
 
     batches(BATCH_SIZE) do |offset|
       topics = mysql_query <<-SQL
         SELECT t.nodeid AS threadid, t.title, t.parentid AS forumid,t.open,t.userid AS postuserid,t.publishdate AS dateline,
             nv.count views, 1 AS visible, t.sticky,
             CONVERT(CAST(rawtext AS BINARY)USING utf8) AS raw
-        FROM #{DBPREFIX}node t
-        LEFT JOIN #{DBPREFIX}nodeview nv ON nv.nodeid=t.nodeid
-        LEFT JOIN #{DBPREFIX}text txt ON txt.nodeid=t.nodeid
-        WHERE t.parentid in ( select nodeid from #{DBPREFIX}node where contenttypeid=23 )
-          AND t.contenttypeid = 22
+        FROM #{DB_PREFIX}node t
+        LEFT JOIN #{DB_PREFIX}nodeview nv ON nv.nodeid=t.nodeid
+        LEFT JOIN #{DB_PREFIX}text txt ON txt.nodeid=t.nodeid
+        WHERE t.parentid in ( select nodeid from #{DB_PREFIX}node where contenttypeid=#{@channel_typeid} )
+          AND t.contenttypeid = #{@text_typeid}
+          AND (t.unpublishdate = 0 OR t.unpublishdate IS NULL)
+          AND t.approved = 1 AND t.showapproved = 1
         ORDER BY t.nodeid
            LIMIT #{BATCH_SIZE}
           OFFSET #{offset}
@@ -277,19 +294,19 @@ class ImportScripts::VBulletin < ImportScripts::Base
     rescue
     end
 
-    post_count = mysql_query("SELECT COUNT(nodeid) cnt FROM #{DBPREFIX}node WHERE parentid NOT IN (
-        SELECT nodeid FROM #{DBPREFIX}node WHERE contenttypeid=23 ) AND contenttypeid=22;").first["cnt"]
+    post_count = mysql_query("SELECT COUNT(nodeid) cnt FROM #{DB_PREFIX}node WHERE parentid NOT IN (
+        SELECT nodeid FROM #{DB_PREFIX}node WHERE contenttypeid=#{@channel_typeid} ) AND contenttypeid=#{@text_typeid};").first["cnt"]
 
     batches(BATCH_SIZE) do |offset|
       posts = mysql_query <<-SQL
         SELECT p.nodeid AS postid, p.userid AS userid, p.parentid AS threadid,
             CONVERT(CAST(rawtext AS BINARY)USING utf8) AS raw, p.publishdate AS dateline,
             1 AS visible, p.parentid AS parentid
-        FROM #{DBPREFIX}node p

[... diff too long, it was truncated ...]

GitHub sha: 094ddb1c

This commit appears in #9477 which was approved by eviltrout. It was merged by gschlager.