DEV: Add Higher Logic import script (#12623)

DEV: Add Higher Logic import script (#12623)

Wrote up a new script to import from Higher Logic. Nothing too crazy going on here. Two major things about this script:

It requires you to convert a Microsoft SQL file to a format MySQL can read.
Higher Logic stores posts (at least in the case of the import I ran) with the email thread shown in the post body. The script does its best to truncate this out, but the logic may need to be improved on future imports. For the import I ran, it worked just fine as is. 🤷‍♂️
diff --git a/script/import_scripts/higher_logic.rb b/script/import_scripts/higher_logic.rb
new file mode 100644
index 0000000..1f5b1d9
--- /dev/null
+++ b/script/import_scripts/higher_logic.rb
@@ -0,0 +1,231 @@
+# frozen_string_literal: true
+
+require "mysql2"
+require File.expand_path(File.dirname(__FILE__) + "/base.rb")
+
+class ImportScripts::HigherLogic < ImportScripts::Base
+
+  HIGHERLOGIC_DB = "higherlogic"
+  BATCH_SIZE = 1000
+  ATTACHMENT_DIR = "/shared/import/data/attachments"
+
+  def initialize
+    super
+
+    @client = Mysql2::Client.new(
+      host: "localhost",
+      username: "root",
+      database: HIGHERLOGIC_DB
+    )
+  end
+
+  def execute
+    import_groups
+    import_users
+    import_group_users
+    import_categories
+    import_posts
+    import_attachments
+  end
+
+  def import_groups
+    puts '', 'importing groups'
+
+    groups = mysql_query <<-SQL
+        SELECT CommunityKey, CommunityName
+          FROM Community
+      ORDER BY CommunityName
+    SQL
+
+    create_groups(groups) do |group|
+      {
+        id: group['CommunityKey'],
+        name: group['CommunityName']
+      }
+    end
+  end
+
+  def import_users
+    puts '', 'importing users'
+    total_count = mysql_query("SELECT count(*) FROM Contact").first["count"]
+
+    batches(BATCH_SIZE) do |offset|
+      results = mysql_query <<-SQL
+        SELECT ContactKey, FirstName, LastName, EmailAddress, HLAdminFlag, UserStatus, CreatedOn, Birthday, Bio
+          FROM Contact
+         LIMIT #{BATCH_SIZE}
+        OFFSET #{offset}
+      SQL
+
+      break if results.size < 1
+
+      next if all_records_exist? :users, results.map { |u| u['ContactKey'] }
+
+      create_users(results, total: total_count, offset: offset) do |user|
+        next if user['EmailAddress'].blank?
+        {
+          id: user['ContactKey'],
+          email: user['EmailAddress'],
+          name: "#{user['FirstName']} #{user['LastName']}",
+          created_at: user['CreatedOn'] == nil ? 0 : Time.zone.at(user['CreatedOn']),
+          bio_raw: user['Bio'],
+          active: user['UserStatus'] == "Active",
+          admin: user['HLAdminFlag'] == 1
+        }
+      end
+    end
+  end
+
+  def import_group_users
+    puts '', 'importing group users'
+
+    group_users = mysql_query(<<-SQL
+      SELECT CommunityKey, ContactKey
+        FROM CommunityMember
+    SQL
+    ).to_a
+
+    group_users.each do |row|
+      next unless user_id = user_id_from_imported_user_id(row['ContactKey'])
+      next unless group_id = group_id_from_imported_group_id(row['CommunityKey'])
+      puts '', '.'
+
+      GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
+    end
+  end
+
+  def import_categories
+    puts '', 'importing categories'
+
+    categories = mysql_query <<-SQL
+      SELECT DiscussionKey, DiscussionName
+        FROM Discussion
+    SQL
+
+    create_categories(categories) do |category|
+      {
+        id: category['DiscussionKey'],
+        name: category['DiscussionName']
+      }
+    end
+  end
+
+  def import_posts
+    puts '', 'importing topics and posts'
+    total_count = mysql_query("SELECT count(*) FROM DiscussionPost").first["count"]
+
+    batches(BATCH_SIZE) do |offset|
+      results = mysql_query <<-SQL
+          SELECT MessageKey,
+                 ParentMessageKey,
+                 Subject,
+                 ContactKey,
+                 DiscussionKey,
+                 PinnedFlag,
+                 Body,
+                 CreatedOn
+            FROM DiscussionPost
+           WHERE CreatedOn > '2020-01-01 00:00:00'
+           LIMIT #{BATCH_SIZE}
+          OFFSET #{offset}
+      SQL
+
+      break if results.size < 1
+      next if all_records_exist? :posts, results.map { |p| p['MessageKey'] }
+
+      create_posts(results, total: total_count, offset: offset) do |post|
+        raw = preprocess_raw(post['Body'])
+        mapped = {
+          id: post['MessageKey'],
+          user_id: user_id_from_imported_user_id(post['ContactKey']),
+          raw: raw,
+          created_at: Time.zone.at(post['CreatedOn']),
+        }
+
+        if post['ParentMessageKey'].nil?
+          mapped[:category] = category_id_from_imported_category_id(post['DiscussionKey']).to_i
+          mapped[:title] = CGI.unescapeHTML(post['Subject'])
+          mapped[:pinned] = post['PinnedFlag'] == 1
+        else
+          topic = topic_lookup_from_imported_post_id(post['ParentMessageKey'])
+
+          if topic.present?
+            mapped[:topic_id] = topic[:topic_id]
+          else
+            puts "Parent post #{post['ParentMessageKey']} doesn't exist. Skipping."
+            next
+          end
+        end
+
+        mapped
+      end
+    end
+  end
+
+  def import_attachments
+    puts '', 'importing attachments'
+
+    count = 0
+
+    total_attachments = mysql_query(<<-SQL
+      SELECT COUNT(*) count
+        FROM LibraryEntryFile l
+        JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey
+       WHERE p.CreatedOn > '2020-01-01 00:00:00'
+    SQL
+    ).first['count']
+
+    batches(BATCH_SIZE) do |offset|
+      attachments = mysql_query(<<-SQL
+           SELECT l.VersionName,
+                  l.FileExtension,
+                  p.MessageKey
+             FROM LibraryEntryFile l
+        LEFT JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey
+            WHERE p.CreatedOn > '2020-01-01 00:00:00'
+            LIMIT #{BATCH_SIZE}
+           OFFSET #{offset}
+      SQL
+      ).to_a
+
+      break if attachments.empty?
+
+      attachments.each do |a|
+        print_status(count += 1, total_attachments, get_start_time("attachments"))
+        original_filename = "#{a['VersionName']}.#{a['FileExtension']}"
+        path = File.join(ATTACHMENT_DIR, original_filename)
+
+        if File.exists?(path)
+          if post = Post.find(post_id_from_imported_post_id(a['MessageKey']))
+            filename = File.basename(original_filename)
+            upload = create_upload(post.user.id, path, filename)
+
+            if upload&.persisted?
+              html = html_for_upload(upload, filename)
+
+              post.raw << "\n\n" << html
+              post.save!
+              PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists?
+            end
+          end
+        end
+      end
+    end
+  end
+
+  def preprocess_raw(body)
+    raw = body.dup
+
+    # trim off any post text beyond ---- to remove email threading
+    raw = raw.slice(0..(raw.index('------'))) || raw
+
+    raw = HtmlToMarkdown.new(raw).to_markdown
+    raw
+  end
+
+  def mysql_query(sql)
+    @client.query(sql, cache_rows: false)
+  end
+end
+
+ImportScripts::HigherLogic.new.perform

GitHub sha: e302e32a

1 Like

This commit appears in #12623 which was approved by eviltrout. It was merged by justindirose.