Improvements to vBulletin bulk import script

Improvements to vBulletin bulk import script

  • import attachments
  • import avatars
  • import user signatures
  • create permalink file
  • reconnect to MySQL db in case of failure
diff --git a/script/bulk_import/vbulletin.rb b/script/bulk_import/vbulletin.rb
index 7361edb..a169900 100644
--- a/script/bulk_import/vbulletin.rb
+++ b/script/bulk_import/vbulletin.rb
@@ -7,6 +7,8 @@ class BulkImport::VBulletin < BulkImport::Base
 
   TABLE_PREFIX = "vb_"
   SUSPENDED_TILL ||= Date.new(3000, 1, 1)
+  ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/shared/import/data/attachments'
+  AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
 
   def initialize
     super
@@ -25,7 +27,8 @@ class BulkImport::VBulletin < BulkImport::Base
       username: username,
       password: password,
       database: database,
-      encoding: charset
+      encoding: charset,
+      reconnect: true
     )
 
     @client.query_options.merge!(as: :array, cache_rows: false)
@@ -41,6 +44,15 @@ class BulkImport::VBulletin < BulkImport::Base
   end
 
   def execute
+    # enable as per requirement:
+    # SiteSetting.automatic_backups_enabled = false
+    # SiteSetting.disable_emails = "non-staff"
+    # SiteSetting.authorized_extensions = '*'
+    # SiteSetting.max_image_size_kb = 102400
+    # SiteSetting.max_attachment_size_kb = 102400
+    # SiteSetting.clean_up_uploads = false
+    # SiteSetting.clean_orphan_uploads_grace_period_hours = 43200
+
     import_groups
     import_users
     import_group_users
@@ -61,6 +73,11 @@ class BulkImport::VBulletin < BulkImport::Base
     import_private_topics
     import_topic_allowed_users
     import_private_posts
+
+    create_permalink_file
+    import_attachments
+    import_avatars
+    import_signatures
   end
 
   def import_groups
@@ -462,6 +479,201 @@ class BulkImport::VBulletin < BulkImport::Base
     end
   end
 
+  def create_permalink_file
+    puts '', 'Creating Permalink File...', ''
+
+    id_mapping = []
+
+    Topic.listable_topics.find_each do |topic|
+      pcf = topic.first_post.custom_fields
+      if pcf && pcf["import_id"]
+        id = pcf["import_id"].split('-').last
+        id_mapping.push("XXX#{id}  YYY#{topic.id}")
+      end
+    end
+
+    # Category.find_each do |cat|
+    #   ccf = cat.custom_fields
+    #   if ccf && ccf["import_id"]
+    #     id = ccf["import_id"].to_i
+    #     id_mapping.push("/forumdisplay.php?#{id}  http://forum.quartertothree.com#{cat.url}")
+    #   end
+    # end
+
+    CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv|
+      id_mapping.each do |value|
+        csv << [value]
+      end
+    end
+  end
+
+  # find the uploaded file information from the db
+  def find_upload(post, attachment_id)
+    sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename,
+                  a.filedata filedata, a.extension extension
+             FROM #{TABLE_PREFIX}attachment a
+            WHERE a.attachmentid = #{attachment_id}"
+    results = mysql_query(sql)
+
+    unless row = results.first
+      puts "Couldn't find attachment record for attachment_id = #{attachment_id} post.id = #{post.id}"
+      return
+    end
+
+    attachment_id = row[0]
+    user_id = row[1]
+    db_filename = row[2]
+
+    filename = File.join(ATTACHMENT_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach")
+    real_filename = db_filename
+    real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
+
+    unless File.exists?(filename)
+      puts "Attachment file #{row.inspect} doesn't exist"
+      return nil
+    end
+
+    upload = create_upload(post.user.id, filename, real_filename)
+
+    if upload.nil? || !upload.valid?
+      puts "Upload not valid :("
+      puts upload.errors.inspect if upload
+      return
+    end
+
+    [upload, real_filename]
+  rescue Mysql2::Error => e
+    puts "SQL Error"
+    puts e.message
+    puts sql
+  end
+
+  def import_attachments
+    puts '', 'importing attachments...'
+
+    RateLimiter.disable
+    current_count = 0
+
+    total_count = mysql_query(<<-SQL
+      SELECT COUNT(p.postid) count
+        FROM #{TABLE_PREFIX}post p
+        JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid
+       WHERE t.firstpostid <> p.postid
+    SQL
+    ).first[0].to_i
+
+    success_count = 0
+    fail_count = 0
+
+    attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i
+
+    Post.find_each do |post|
+      current_count += 1
+      print_status current_count, total_count
+
+      new_raw = post.raw.dup
+      new_raw.gsub!(attachment_regex) do |s|
+        matches = attachment_regex.match(s)
+        attachment_id = matches[1]
+
+        upload, filename = find_upload(post, attachment_id)
+        unless upload
+          fail_count += 1
+          next
+          # should we strip invalid attach tags?
+        end
+
+        html_for_upload(upload, filename)
+      end
+
+      if new_raw != post.raw
+        PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin')
+      end
+
+      success_count += 1
+    end
+
+    puts "", "imported #{success_count} attachments... failed: #{fail_count}."
+    RateLimiter.enable
+  end
+
+  def import_avatars
+    if AVATAR_DIR && File.exists?(AVATAR_DIR)
+      puts "", "importing user avatars"
+
+      RateLimiter.disable
+      start = Time.now
+      count = 0
+
+      Dir.foreach(AVATAR_DIR) do |item|
+        print "\r%7d - %6d/sec".freeze % [count, count.to_f / (Time.now - start)]
+
+        next if item == ('.') || item == ('..') || item == ('.DS_Store')
+        next unless item =~ /avatar(\d+)_(\d).gif/
+        scan = item.scan(/avatar(\d+)_(\d).gif/)
+        next unless scan[0][0].present?
+        u = UserCustomField.find_by(name: "import_id", value: scan[0][0]).try(:user)
+        next unless u.present?
+        # raise "User not found for id #{user_id}" if user.blank?
+
+        photo_real_filename = File.join(AVATAR_DIR, item)
+        puts "#{photo_real_filename} not found" unless File.exists?(photo_real_filename)
+
+        upload = create_upload(u.id, photo_real_filename, File.basename(photo_real_filename))
+        count += 1
+        if upload.persisted?
+          u.import_mode = false
+          u.create_user_avatar
+          u.import_mode = true
+          u.user_avatar.update(custom_upload_id: upload.id)
+          u.update(uploaded_avatar_id: upload.id)
+        else
+          puts "Error: Upload did not persist for #{u.username} #{photo_real_filename}!"
+        end
+      end
+
+      puts "", "imported #{count} avatars..."
+      RateLimiter.enable
+    end
+  end
+
+  def import_signatures
+    puts "Importing user signatures..."
+
+    total_count = mysql_query(<<-SQL
+      SELECT COUNT(userid) count
+        FROM #{TABLE_PREFIX}sigparsed
+    SQL
+    ).first[0].to_i
+    current_count = 0
+
+    user_signatures = mysql_stream <<-SQL
+        SELECT userid, signatureparsed
+          FROM #{TABLE_PREFIX}sigparsed
+      ORDER BY userid
+    SQL
+
+    user_signatures.each do |sig|
+      current_count += 1
+      print_status current_count, total_count
+      user_id = sig[0]
+      user_sig = sig[1]
+      next unless user_id.present? && user_sig.present?
+
+      u = UserCustomField.find_by(name: "import_id", value: user_id).try(:user)
+      next unless u.present?
+
+      # can not hold dupes
+      UserCustomField.where(user_id: u.id, name: ["see_signatures", "signature_raw", "signature_cooked"]).destroy_all
+
+      user_sig.gsub!(/\[\/?sigpic\]/i, "")
+
+      UserCustomField.create!(user_id: u.id, name: "see_signatures", value: true)
+      UserCustomField.create!(user_id: u.id, name: "signature_raw", value: user_sig)
+      UserCustomField.create!(user_id: u.id, name: "signature_cooked", value: PrettyText.cook(user_sig, omit_nofollow: false))
+    end
+  end
+
   def extract_pm_title(title)
     normalize_text(title).scrub.gsub(/^Re\s*:\s*/i, "")
   end
@@ -473,6 +685,17 @@ class BulkImport::VBulletin < BulkImport::Base
     date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
   end
 

[... diff too long, it was truncated ...]

GitHub sha: 110512d4

1 Like