create answerhub importer (#7671)

create answerhub importer (#7671)

diff --git a/script/import_scripts/answerhub.rb b/script/import_scripts/answerhub.rb
new file mode 100644
index 0000000..a818777
--- /dev/null
+++ b/script/import_scripts/answerhub.rb
@@ -0,0 +1,444 @@
+# frozen_string_literal: true
+
+# AnswerHub Importer
+#
+# Based on having access to a mysql dump.
+# Pass in the ENV variables listed below before runing the script.
+
+require_relative 'base'
+require 'mysql2'
+require 'open-uri'
+
+class ImportScripts::AnswerHub < ImportScripts::Base
+
+  DB_NAME ||= ENV['DB_NAME'] || "answerhub"
+  DB_PASS ||= ENV['DB_PASS'] || "answerhub"
+  DB_USER ||= ENV['DB_USER'] || "answerhub"
+  TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "network1"
+  BATCH_SIZE ||= ENV['BATCH_SIZE'].to_i || 1000
+  ATTACHMENT_DIR = ENV['ATTACHMENT_DIR'] || ''
+  PROCESS_UPLOADS = ENV['PROCESS_UPLOADS'].to_i || 0
+  ANSWERHUB_DOMAIN = ENV['ANSWERHUB_DOMAIN']
+  AVATAR_DIR = ENV['AVATAR_DIR'] || ""
+  SITE_ID = ENV['SITE_ID'].to_i || 0
+  CATEGORY_MAP_FROM = ENV['CATEGORY_MAP_FROM'].to_i || 0
+  CATEGORY_MAP_TO = ENV['CATEGORY_MAP_TO'].to_i || 0
+  SCRAPE_AVATARS = ENV['SCRAPE_AVATARS'].to_i || 0
+
+  def initialize
+    super
+    @client = Mysql2::Client.new(
+      host: "localhost",
+      username: DB_USER,
+      password: DB_PASS,
+      database: DB_NAME
+    )
+    @skip_updates = true
+    SiteSetting.tagging_enabled = true
+    SiteSetting.max_tags_per_topic = 10
+  end
+
+  def execute
+    puts "Now starting the AnswerHub Import"
+    puts "DB Name: #{DB_NAME}"
+    puts "Table Prefix: #{TABLE_PREFIX}"
+    puts
+    import_users
+    import_categories
+    import_topics
+    import_posts
+    import_groups
+    add_users_to_groups
+    add_moderators
+    add_admins
+    import_avatars
+  end
+
+  def import_users
+    puts '', "creating users"
+
+    query =
+      "SELECT count(*) count
+       FROM #{TABLE_PREFIX}_authoritables
+       WHERE c_type = 'user'
+       AND c_active = 1
+       AND c_system <> 1;"
+    total_count = @client.query(query).first['count']
+    puts "Total count: #{total_count}"
+    @last_user_id = -1
+
+    batches(BATCH_SIZE) do |offset|
+      query = "SELECT c_id, c_creation_date, c_name, c_primaryEmail, c_last_seen, c_description
+      FROM #{TABLE_PREFIX}_authoritables
+      WHERE c_type='user'
+      AND c_active = 1
+      AND c_system <> 1
+      AND c_id > #{@last_user_id}
+      LIMIT #{BATCH_SIZE};"
+
+      results = @client.query(query)
+      break if results.size < 1
+      @last_user_id = results.to_a.last['c_id']
+
+      create_users(results, total: total_count, offset: offset) do |user|
+        puts user['c_id'].to_s + ' ' + user['c_name']
+        next if @lookup.user_id_from_imported_user_id(user['c_id'])
+        { id: user['c_id'],
+          email: "#{SecureRandom.hex}@invalid.invalid",
+          username: user['c_name'],
+          created_at: user['c_creation_date'],
+          bio_raw: user['c_description'],
+          last_seen_at: user['c_last_seen'],
+        }
+      end
+    end
+  end
+
+  def import_categories
+    puts "", "importing categories..."
+
+    # Import parent categories first
+    query = "SELECT c_id, c_name, c_plug, c_parent
+    FROM containers
+    WHERE c_type = 'space'
+    AND c_active = 1
+    AND c_parent = 7 OR c_parent IS NULL"
+    results = @client.query(query)
+
+    create_categories(results) do |c|
+      {
+        id: c['c_id'],
+        name: c['c_name'],
+        parent_category_id: check_parent_id(c['c_parent']),
+      }
+    end
+
+    # Import sub-categories
+    query = "SELECT c_id, c_name, c_plug, c_parent
+    FROM containers
+    WHERE c_type = 'space'
+    AND c_active = 1
+    AND c_parent != 7 AND c_parent IS NOT NULL"
+    results = @client.query(query)
+
+    create_categories(results) do |c|
+      puts c.inspect
+      {
+        id: c['c_id'],
+        name: c['c_name'],
+        parent_category_id: category_id_from_imported_category_id(check_parent_id(c['c_parent'])),
+      }
+    end
+  end
+
+  def import_topics
+    puts "", "importing topics..."
+
+    count_query =
+      "SELECT count(*) count
+       FROM #{TABLE_PREFIX}_nodes
+       WHERE c_visibility <> 'deleted'
+       AND (c_type = 'question'
+         OR c_type = 'kbentry');"
+    total_count = @client.query(count_query).first['count']
+
+    @last_topic_id = -1
+
+    batches(BATCH_SIZE) do |offset|
+      # Let's start with just question types
+      query =
+        "SELECT *
+         FROM #{TABLE_PREFIX}_nodes
+         WHERE c_id > #{@last_topic_id}
+         AND c_visibility <> 'deleted'
+         AND (c_type = 'question'
+           OR c_type = 'kbentry')
+         ORDER BY c_id ASC
+         LIMIT #{BATCH_SIZE};"
+      topics = @client.query(query)
+
+      break if topics.size < 1
+      @last_topic_id = topics.to_a.last['c_id']
+
+      create_posts(topics, total: total_count, offset: offset) do |t|
+        user_id = user_id_from_imported_user_id(t['c_author']) || Discourse::SYSTEM_USER_ID
+        if PROCESS_UPLOADS == 1
+          body = process_uploads(t['c_body'], user_id)
+        else
+          body = t['c_body']
+        end
+        markdown_body = HtmlToMarkdown.new(body).to_markdown
+        {
+          id: t['c_id'],
+          user_id: user_id,
+          title: t['c_title'],
+          category: category_id_from_imported_category_id(t['c_primaryContainer']),
+          raw: markdown_body,
+          created_at: t['c_creation_date'],
+          post_create_action: proc do |post|
+            tag_names = t['c_topic_names'].split(',')
+            DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
+          end
+        }
+      end
+    end
+  end
+
+  def import_posts
+    puts "", "importing posts..."
+
+    count_query =
+      "SELECT count(*) count
+       FROM #{TABLE_PREFIX}_nodes
+       WHERE c_visibility <> 'deleted'
+       AND (c_type = 'answer'
+         OR c_type = 'comment'
+         OR c_type = 'kbentry');"
+    total_count = @client.query(count_query).first['count']
+
+    @last_post_id = -1
+
+    batches(BATCH_SIZE) do |offset|
+      query =
+        "SELECT *
+         FROM #{TABLE_PREFIX}_nodes
+         WHERE c_id > #{@last_post_id}
+         AND c_visibility <> 'deleted'
+         AND (c_type = 'answer'
+           OR c_type = 'comment'
+           OR c_type = 'kbentry')
+         ORDER BY c_id ASC
+         LIMIT #{BATCH_SIZE};"
+      posts = @client.query(query)
+      next if all_records_exist? :posts, posts.map { |p| p['c_id'] }
+
+      break if posts.size < 1
+      @last_post_id = posts.to_a.last['c_id']
+
+      create_posts(posts, total: total_count, offset: offset) do |p|
+        t = topic_lookup_from_imported_post_id(p['c_parent'])
+        next unless t
+        user_id = user_id_from_imported_user_id(p['c_author']) || Discourse::SYSTEM_USER_ID
+        if PROCESS_UPLOADS == 1
+          body = process_uploads(p['c_body'], user_id)
+        else
+          body = t['c_body']
+        end
+        markdown_body = HtmlToMarkdown.new(body).to_markdown
+        {
+          id: p['c_id'],
+          user_id: user_id,
+          topic_id: t[:topic_id],
+          reply_to_post_number: t[:post_number],
+          raw: markdown_body,
+          created_at: p['c_creation_date'],
+          post_create_action: proc do |post_info|
+            begin
+              if p['c_type'] == 'answer' && p['c_marked'] == 1
+                post = Post.find(post_info[:id])
+                if post
+                  user_id = user_id_from_imported_user_id(p['c_author']) || Discourse::SYSTEM_USER_ID
+                  current_user = User.find(user_id)
+                  solved = DiscourseSolved.accept_answer!(post, current_user)
+                  puts "SOLVED: #{solved}"
+                end
+              end
+            rescue ActiveRecord::RecordInvalid
+              puts "SOLVED: Skipped post_id: #{post.id} because invalid"
+            end
+          end
+        }
+      end
+    end
+  end
+

[... diff too long, it was truncated ...]

GitHub sha: 0955d9ec

2 Likes