FIX: upload watched words should use UTF-8

FIX: upload watched words should use UTF-8

diff --git a/app/controllers/admin/watched_words_controller.rb b/app/controllers/admin/watched_words_controller.rb
index 119562d..3be2ea2 100644
--- a/app/controllers/admin/watched_words_controller.rb
+++ b/app/controllers/admin/watched_words_controller.rb
@@ -25,7 +25,7 @@ class Admin::WatchedWordsController < Admin::AdminController
 
     Scheduler::Defer.later("Upload watched words") do
       begin
-        File.open(file.tempfile, encoding: "ISO-8859-1").each_line do |line|
+        File.open(file.tempfile, encoding: "bom|utf-8").each_line do |line|
           WatchedWord.create_or_update_word(word: line, action_key: action_key) unless line.empty?
         end
         data = { url: '/ok' }
diff --git a/spec/fixtures/csv/words.csv b/spec/fixtures/csv/words.csv
new file mode 100644
index 0000000..646d644
--- /dev/null
+++ b/spec/fixtures/csv/words.csv
@@ -0,0 +1,6 @@
+thread
+线
+धागा
+실
+tråd
+нить
diff --git a/spec/integration/watched_words_spec.rb b/spec/integration/watched_words_spec.rb
index 646084d..6fe25b2 100644
--- a/spec/integration/watched_words_spec.rb
+++ b/spec/integration/watched_words_spec.rb
@@ -179,4 +179,25 @@ describe WatchedWord do
       }.to_not change { PostAction.count }
     end
   end
+
+  describe 'upload' do
+    context 'logged in as admin' do
+      before do
+        sign_in(admin)
+      end
+
+      it 'creates the words from the file' do
+        post '/admin/logs/watched_words/upload.json', params: {
+          action_key: 'flag',
+          file: Rack::Test::UploadedFile.new(file_from_fixtures("words.csv", "csv"))
+        }
+        expect(response.status).to eq(200)
+        expect(WatchedWord.count).to eq(6)
+        expect(WatchedWord.pluck(:word)).to contain_exactly(
+          'thread', '线', 'धागा', '실', 'tråd', 'нить'
+        )
+        expect(WatchedWord.pluck(:action).uniq).to eq([WatchedWord.actions[:flag]])
+      end
+    end
+  end
 end

GitHub sha: 1812a38f

2 Likes

This commit has been mentioned on Discourse Meta. There might be relevant details there:

1 Like

hmmm will this explode if you upload an ISO-8859-1 file now? How do we even make one of them …

I guess this is better, but I wonder if we need this whole encoding thing altogether and not just let ruby figure out what to do?