FIX: use hijack for processing bulk invites (#7679)

follow-up
#1

FIX: use hijack for processing bulk invites (#7679)

FIX: do not store bulk invite CSV file on server

diff --git a/app/controllers/invites_controller.rb b/app/controllers/invites_controller.rb
index 17ded05..e0d1bb5 100644
--- a/app/controllers/invites_controller.rb
+++ b/app/controllers/invites_controller.rb
@@ -172,24 +172,31 @@ class InvitesController < ApplicationController
   def upload_csv
     guardian.ensure_can_bulk_invite_to_forum!(current_user)
 
-    file = params[:file] || params[:files].first
-    name = params[:name] || File.basename(file.original_filename, ".*")
-    extension = File.extname(file.original_filename)
+    hijack do
+      begin
+        file = params[:file] || params[:files].first
 
-    begin
-      data = if extension.downcase == ".csv"
-        path = Invite.create_csv(file, name)
-        Jobs.enqueue(:bulk_invite, filename: "#{name}#{extension}", current_user_id: current_user.id)
-        { url: path }
-      else
-        failed_json.merge(errors: [I18n.t("bulk_invite.file_should_be_csv")])
+        if File.read(file.tempfile).scan(/\n/).count.to_i > 50000
+          return render json: failed_json.merge(errors: [I18n.t("bulk_invite.max_rows")]), status: 422
+        end
+
+        invites = []
+        CSV.foreach(file.tempfile) do |row|
+          invite_hash = { email: row[0], groups: row[1], topic_id: row[2] }
+          if invite_hash[:email].present?
+            invites.push(invite_hash)
+          end
+        end
+        if invites.present?
+          Jobs.enqueue(:bulk_invite, invites: invites, current_user_id: current_user.id)
+          render json: success_json
+        else
+          render json: failed_json.merge(errors: [I18n.t("bulk_invite.error")]), status: 422
+        end
+      rescue
+        render json: failed_json.merge(errors: [I18n.t("bulk_invite.error")]), status: 422
       end
-    rescue
-      failed_json.merge(errors: [I18n.t("bulk_invite.error")])
     end
-    MessageBus.publish("/uploads/csv", data.as_json, user_ids: [current_user.id])
-
-    render json: success_json
   end
 
   def fetch_username
diff --git a/app/jobs/regular/bulk_invite.rb b/app/jobs/regular/bulk_invite.rb
index 31f00df..7c3d1a6 100644
--- a/app/jobs/regular/bulk_invite.rb
+++ b/app/jobs/regular/bulk_invite.rb
@@ -1,6 +1,5 @@
 # frozen_string_literal: true
 
-require 'csv'
 require_dependency 'system_message'
 
 module Jobs
@@ -18,48 +17,38 @@ module Jobs
     end
 
     def execute(args)
-      filename = args[:filename]
-      raise Discourse::InvalidParameters.new(:filename) if filename.blank?
+      invites = args[:invites]
+      raise Discourse::InvalidParameters.new(:invites) if invites.blank?
 
       @current_user = User.find_by(id: args[:current_user_id])
       raise Discourse::InvalidParameters.new(:current_user_id) unless @current_user
       @guardian = Guardian.new(@current_user)
 
-      csv_path = "#{Invite.base_directory}/#{filename}"
-
-      # read csv file, and send out invitations
-      read_csv_file(csv_path)
+      process_invites(invites)
     ensure
-      # send notification to user regarding progress
       notify_user
-      File.delete(csv_path) if csv_path
     end
 
     private
 
-    def read_csv_file(csv_path)
-      file = File.open(csv_path, encoding: 'bom|utf-8')
-      CSV.new(file).each do |csv_info|
-        if csv_info[0]
-          if (EmailValidator.email_regex =~ csv_info[0])
-            # email is valid
-            send_invite(csv_info, $INPUT_LINE_NUMBER)
-            @sent += 1
-          else
-            # invalid email
-            save_log "Invalid Email '#{csv_info[0]}' at line number '#{$INPUT_LINE_NUMBER}'"
-            @failed += 1
-          end
+    def process_invites(invites)
+      invites.each do |invite|
+        if (EmailValidator.email_regex =~ invite[:email])
+          # email is valid
+          send_invite(invite)
+          @sent += 1
+        else
+          # invalid email
+          save_log "Invalid Email '#{invite[:email]}"
+          @failed += 1
         end
       end
     rescue Exception => e
       save_log "Bulk Invite Process Failed -- '#{e.message}'"
       @failed += 1
-    ensure
-      file&.close
     end
 
-    def get_groups(group_names, csv_line_number)
+    def get_groups(group_names)
       groups = []
 
       if group_names
@@ -73,7 +62,7 @@ module Jobs
             groups.push(group)
           else
             # invalid group
-            save_log "Invalid Group '#{group_name}' at line number '#{csv_line_number}'"
+            save_log "Invalid Group '#{group_name}'"
             @failed += 1
           end
         }
@@ -82,13 +71,13 @@ module Jobs
       groups
     end
 
-    def get_topic(topic_id, csv_line_number)
+    def get_topic(topic_id)
       topic = nil
 
       if topic_id
         topic = Topic.find_by_id(topic_id)
         if topic.nil?
-          save_log "Invalid Topic ID '#{topic_id}' at line number '#{csv_line_number}'"
+          save_log "Invalid Topic ID '#{topic_id}'"
           @failed += 1
         end
       end
@@ -96,10 +85,10 @@ module Jobs
       return topic
     end
 
-    def send_invite(csv_info, csv_line_number)
-      email = csv_info[0]
-      groups = get_groups(csv_info[1], csv_line_number)
-      topic = get_topic(csv_info[2], csv_line_number)
+    def send_invite(invite)
+      email = invite[:email]
+      groups = get_groups(invite[:groups])
+      topic = get_topic(invite[:topic_id])
 
       begin
         if user = User.find_by_email(email)
@@ -171,7 +160,5 @@ module Jobs
 
       result
     end
-
   end
-
 end
diff --git a/app/models/invite.rb b/app/models/invite.rb
index 0f23e58..1a909a2 100644
--- a/app/models/invite.rb
+++ b/app/models/invite.rb
@@ -242,14 +242,6 @@ class Invite < ActiveRecord::Base
   def self.base_directory
     File.join(Rails.root, "public", "uploads", "csv", RailsMultisite::ConnectionManagement.current_db)
   end
-
-  def self.create_csv(file, name)
-    extension = File.extname(file.original_filename)
-    path = "#{Invite.base_directory}/#{name}#{extension}"
-    FileUtils.mkdir_p(Pathname.new(path).dirname)
-    File.open(path, "wb") { |f| f << file.tempfile.read }
-    path
-  end
 end
 
 # == Schema Information
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index 31674a4..0753105 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -218,6 +218,7 @@ en:
 
   bulk_invite:
     file_should_be_csv: "The uploaded file should be of csv format."
+    max_rows: "Maximum of 50,000 invites can be sent at a time. Try splitting the file in smaller parts."
     error: "There was an error uploading that file. Please try again later."
 
   topic_invite:
diff --git a/spec/fixtures/csv/bulk_invite.csv b/spec/fixtures/csv/bulk_invite.csv
deleted file mode 100644
index 9d0d3b0..0000000
--- a/spec/fixtures/csv/bulk_invite.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-test2@discourse.org
-test@discourse.org,GROUP1;group2,999
diff --git a/spec/jobs/bulk_invite_spec.rb b/spec/jobs/bulk_invite_spec.rb
index fb4e49d..d271df7 100644
--- a/spec/jobs/bulk_invite_spec.rb
+++ b/spec/jobs/bulk_invite_spec.rb
@@ -10,30 +10,22 @@ describe Jobs::BulkInvite do
     fab!(:group2) { Fabricate(:group, name: 'group2') }
     fab!(:topic) { Fabricate(:topic, id: 999) }
     let(:email) { "test@discourse.org" }
-    let(:basename) { "bulk_invite.csv" }
-    let(:filename) { "#{Invite.base_directory}/#{basename}" }
+    let(:invites) { [{ email: 'test2@discourse.org' }, { email: 'test@discourse.org', groups: 'GROUP1;group2', topic_id: 999 }] }
 
-    before do
-      Invite.create_csv(
-        fixture_file_upload("#{Rails.root}/spec/fixtures/csv/#{basename}"),
-        "bulk_invite"
-      )
-    end
-
-    it 'raises an error when the filename is missing' do
+    it 'raises an error when the invites array is missing' do
       expect { Jobs::BulkInvite.new.execute(current_user_id: user.id) }
-        .to raise_error(Discourse::InvalidParameters, /filename/)
+        .to raise_error(Discourse::InvalidParameters, /invites/)
     end
 

[... diff too long, it was truncated ...]

GitHub sha: e7fe7010

#2

Hmm this reads the entire file into memory and then we’re reading the file again with CSV.foreach. Perhaps we can just process it line by line until we hit 50,000 rows max.

#3

Do we need to form this hash of the email is not present?

1 Like
#4

We generally want to avoid using rescue without a specific error since it makes debugging any issues really hard.

1 Like
#5

Do we need to clean up this directory since it is no longer being used?

#6

Do we need to update the controller tests for the changes made here?

#7

I was initially thinking about this approach as well. Let’s say the the file has 60,000 rows and we have already iterated 50,000 rows should we process those 50,000 invites only and notify user that we have sent out invites to first 50,000 rows and discarded the rest? Or should we discard them all?

Sending partial invites (first 50,000) might be confusing for end user.

#8

Yes, it’s on my todo list for this week.

#9

I have already updated controller tests to accommodate for the changes I made. I’ll look into adding a test for max rows limit we added.

#10

Hmm are the tests in another commit?

#11

Not sure but I think reading the entire file into memory and then reading it line by line again doesn’t seem ideal.

#12

Oh wait, the tests for controller did not require any change since they are only checking for a job being queued to process invites. They have a scope for improvement with these recent changes, will do.

Follow Up #13
#15

This commit has been mentioned on Discourse Meta. There might be relevant details there:

#16

Followed up in: