fix vbulletin importer to import unreferenced attachments (#12187)

fix vbulletin importer to import unreferenced attachments (#12187)

diff --git a/script/import_scripts/vbulletin.rb b/script/import_scripts/vbulletin.rb
index e1c86bf..d998daa 100644
--- a/script/import_scripts/vbulletin.rb
+++ b/script/import_scripts/vbulletin.rb
@@ -570,6 +570,25 @@ EOM
   def import_attachments
     puts '', 'importing attachments...'
 
+    mapping = {}
+    attachments = mysql_query(<<-SQL
+      SELECT a.attachmentid, a.contentid as postid, p.threadid
+        FROM #{TABLE_PREFIX}attachment a, #{TABLE_PREFIX}post p
+       WHERE a.contentid = p.postid
+       AND contenttypeid = 1 AND state = 'visible'
+    SQL
+    )
+    attachments.each do |attachment|
+      post_id = post_id_from_imported_post_id(attachment['postid'])
+      post_id = post_id_from_imported_post_id("thread-#{attachment['threadid']}") unless post_id
+      if post_id.nil?
+        puts "Post for attachment #{attachment['attachmentid']} not found"
+        next
+      end
+      mapping[post_id] ||= []
+      mapping[post_id] << attachment['attachmentid'].to_i
+    end
+
     current_count = 0
 
     total_count = mysql_query(<<-SQL
@@ -594,6 +613,10 @@ EOM
         matches = attachment_regex.match(s)
         attachment_id = matches[1]
 
+        unless mapping[post.id].nil?
+          mapping[post.id].delete(attachment_id.to_i)
+        end
+
         upload, filename = find_upload(post, attachment_id)
         unless upload
           fail_count += 1
@@ -603,6 +626,40 @@ EOM
         html_for_upload(upload, filename)
       end
 
+      # make resumed imports faster
+      if new_raw == post.raw
+        unless mapping[post.id].nil? || mapping[post.id].empty?
+          imported_text = mysql_query(<<-SQL
+            SELECT p.pagetext
+              FROM #{TABLE_PREFIX}attachment a, #{TABLE_PREFIX}post p
+             WHERE a.contentid = p.postid
+             AND a.attachmentid = #{mapping[post.id][0]}
+          SQL
+          ).first["pagetext"]
+
+          imported_text.scan(attachment_regex) do |match|
+            attachment_id = match[0]
+            mapping[post.id].delete(attachment_id.to_i)
+          end
+        end
+      end
+
+      unless mapping[post.id].nil? || mapping[post.id].empty?
+        mapping[post.id].each do |attachment_id|
+          upload, filename = find_upload(post, attachment_id)
+          unless upload
+            fail_count += 1
+            next
+          end
+
+          # internal upload deduplication will make sure that we do not import attachments again
+          html = html_for_upload(upload, filename)
+          if !new_raw[html]
+            new_raw += "\n\n#{html}\n\n"
+          end
+        end
+      end
+
       if new_raw != post.raw
         PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin')
       end

GitHub sha: 5bec0e57

1 Like

This commit appears in #12187 which was approved by gschlager. It was merged by gschlager.