User export: profile as json, export auth token logs (#10819)

User export: profile as json, export auth token logs (#10819)

  • FEATURE: Export the entire user profile as json, not just bio/website

  • FEATURE: Add session log information to user export

Even though the columns are named ‘auth_token’ etc, the content is not actually usable to log into the forum with. Despite all that, it is still truncated for export, to avoid any ‘token hash cracking’ situations.

diff --git a/app/jobs/regular/export_user_archive.rb b/app/jobs/regular/export_user_archive.rb
index f2e77b0..fe5d0ea 100644
--- a/app/jobs/regular/export_user_archive.rb
+++ b/app/jobs/regular/export_user_archive.rb
@@ -12,7 +12,9 @@ module Jobs
 
     COMPONENTS ||= %w(
       user_archive
-      user_archive_profile
+      preferences
+      auth_tokens
+      auth_token_logs
       badges
       bookmarks
       category_preferences
@@ -22,6 +24,8 @@ module Jobs
     HEADER_ATTRS_FOR ||= HashWithIndifferentAccess.new(
       user_archive: ['topic_title', 'categories', 'is_pm', 'post', 'like_count', 'reply_count', 'url', 'created_at'],
       user_archive_profile: ['location', 'website', 'bio', 'views'],
+      auth_tokens: ['id', 'auth_token_hash', 'prev_auth_token_hash', 'auth_token_seen', 'client_ip', 'user_agent', 'seen_at', 'rotated_at', 'created_at', 'updated_at'],
+      auth_token_logs: ['id', 'action', 'user_auth_token_id', 'client_ip', 'auth_token_hash', 'created_at', 'path', 'user_agent'],
       badges: ['badge_id', 'badge_name', 'granted_at', 'post_id', 'seq', 'granted_manually', 'notification_id', 'featured_rank'],
       bookmarks: ['post_id', 'topic_id', 'post_number', 'link', 'name', 'created_at', 'updated_at', 'reminder_type', 'reminder_at', 'reminder_last_sent_at', 'reminder_set_at', 'auto_delete_preference'],
       category_preferences: ['category_id', 'category_names', 'notification_level', 'dismiss_new_timestamp'],
@@ -38,12 +42,15 @@ module Jobs
       COMPONENTS.each do |name|
         h = { name: name, method: :"#{name}_export" }
         h[:filetype] = :csv
-        filename_method = :"#{name}_filename"
-        if respond_to? filename_method
-          h[:filename] = public_send(filename_method)
-        else
-          h[:filename] = name
+        filetype_method = :"#{name}_filetype"
+        if respond_to? filetype_method
+          h[:filetype] = public_send(filetype_method)
+        end
+        condition_method = :"include_#{name}?"
+        if respond_to? condition_method
+          h[:skip] = !public_send(condition_method)
         end
+        h[:filename] = name
         components.push(h)
       end
 
@@ -61,12 +68,17 @@ module Jobs
       zip_filename = nil
       begin
         components.each do |component|
+          next if component[:skip]
           case component[:filetype]
           when :csv
             CSV.open("#{dirname}/#{component[:filename]}.csv", "w") do |csv|
               csv << get_header(component[:name])
               public_send(component[:method]) { |d| csv << d }
             end
+          when :json
+            File.open("#{dirname}/#{component[:filename]}.json", "w") do |file|
+              file.write MultiJson.dump(public_send(component[:method]), indent: 4)
+            end
           else
             raise 'unknown export filetype'
           end
@@ -132,6 +144,59 @@ module Jobs
       end
     end
 
+    def preferences_export
+      UserSerializer.new(@current_user, scope: guardian)
+    end
+
+    def preferences_filetype
+      :json
+    end
+
+    def auth_tokens_export
+      return enum_for(:auth_tokens) unless block_given?
+
+      UserAuthToken
+        .where(user_id: @current_user.id)
+        .each do |token|
+        yield [
+          token.id,
+          token.auth_token.to_s[0..4] + "...", # hashed and truncated
+          token.prev_auth_token[0..4] + "...",
+          token.auth_token_seen,
+          token.client_ip,
+          token.user_agent,
+          token.seen_at,
+          token.rotated_at,
+          token.created_at,
+          token.updated_at,
+        ]
+      end
+    end
+
+    def include_auth_token_logs?
+      # SiteSetting.verbose_auth_token_logging
+      UserAuthTokenLog.where(user_id: @current_user.id).exists?
+    end
+
+    def auth_token_logs_export
+      return enum_for(:auth_token_logs) unless block_given?
+
+      UserAuthTokenLog
+        .where(user_id: @current_user.id)
+        .each do |log|
+        yield [
+          log.id,
+          log.action,
+          log.user_auth_token_id,
+          log.client_ip,
+          log.auth_token.to_s[0..4] + "...", # hashed and truncated
+          log.created_at,
+          log.path,
+          log.user_agent,
+        ]
+      end
+    end
+
     def badges_export
       return enum_for(:badges_export) unless block_given?
 
diff --git a/spec/jobs/export_user_archive_spec.rb b/spec/jobs/export_user_archive_spec.rb
index 4d305d7..7327429 100644
--- a/spec/jobs/export_user_archive_spec.rb
+++ b/spec/jobs/export_user_archive_spec.rb
@@ -26,6 +26,10 @@ describe Jobs::ExportUserArchive do
     [data_rows, csv_out]
   end
 
+  def make_component_json
+    JSON.parse(MultiJson.dump(job.public_send(:"#{component}_export")))
+  end
+
   context '#execute' do
     let(:post) { Fabricate(:post, user: user) }
 
@@ -33,6 +37,11 @@ describe Jobs::ExportUserArchive do
       _ = post
       user.user_profile.website = 'https://doe.example.com/john'
       user.user_profile.save
+      # force a UserAuthTokenLog entry
+      Discourse.current_user_provider.new({
+        'HTTP_USER_AGENT' => 'MyWebBrowser',
+        'REQUEST_PATH' => '/some_path/456852',
+      }).log_on_user(user, {}, {})
     end
 
     after do
@@ -143,20 +152,57 @@ describe Jobs::ExportUserArchive do
     end
   end
 
-  context 'user_archive_profile' do
-    let(:component) { 'user_archive_profile' }
+  context 'preferences' do
+    let(:component) { 'preferences' }
 
     before do
       user.user_profile.website = 'https://doe.example.com/john'
       user.user_profile.bio_raw = "I am John Doe\n\nHere I am"
       user.user_profile.save
+      user.user_option.text_size = :smaller
+      user.user_option.automatically_unpin_topics = false
+      user.user_option.save
     end
 
     it 'properly includes the profile fields' do
-      _, csv_out = make_component_csv
+      serializer = job.preferences_export
+      # puts MultiJson.dump(serializer, indent: 4)
+      output = make_component_json
+      payload = output['user']
+
+      expect(payload['website']).to match('doe.example.com')
+      expect(payload['bio_raw']).to match("Doe\n\nHere")
+      expect(payload['user_option']['automatically_unpin_topics']).to eq(false)
+      expect(payload['user_option']['text_size']).to eq('smaller')
+    end
+  end
+
+  context 'auth tokens' do
+    let(:component) { 'auth_tokens' }
 
-      expect(csv_out).to match('doe.example.com')
-      expect(csv_out).to match("Doe\n\nHere")
+    before do
+      Discourse.current_user_provider.new({
+        'HTTP_USER_AGENT' => 'MyWebBrowser',
+        'REQUEST_PATH' => '/some_path/456852',
+      }).log_on_user(user, {}, {})
+    end
+
+    it 'properly includes session records' do
+      data, csv_out = make_component_csv
+      expect(data.length).to eq(1)
+
+      expect(data[0]['user_agent']).to eq('MyWebBrowser')
+    end
+
+    context 'auth token logs' do
+      let(:component) { 'auth_token_logs' }
+      it 'includes details such as the path' do
+        data, csv_out = make_component_csv
+        expect(data.length).to eq(1)
+
+        expect(data[0]['action']).to eq('generate')
+        expect(data[0]['path']).to eq('/some_path/456852')
+      end
     end
   end
 

GitHub sha: 68e87bb5

This commit appears in #10819 which was approved by eviltrout. It was merged by riking.