FEATURE: new 'trim_incoming_emails' site setting (#12874)

FEATURE: new ‘trim_incoming_emails’ site setting (#12874)

This setting allows admin to de/activate automatic trimming of incoming email. There are instances where it does wonders in trimming all the garbage content and other instances where it’s so bad that it trims the most important part of the email.

FIX: don’t remove hidden content using the style attribute when converting HTML to Markdown. The regexp used was doing more harm than good. It was way too broad.

FIX: properly elide signatures from emails sent with Front App. This is fairly safe as Front App nicely identifies signatures in the HTML part.

diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index 66589a8..880ff97 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -1995,6 +1995,7 @@ en:
 
     forwarded_emails_behaviour: "How to treat a forwarded email to Discourse"
     always_show_trimmed_content: "Always show trimmed part of incoming emails. WARNING: might reveal email addresses."
+    trim_incoming_emails: "Trim part of the incoming emails that isn't relevant."
     private_email: "Don't include content from posts or topics in email title or email body. NOTE: also disables digest emails."
     email_total_attachment_size_limit_kb: "Max total size of files attached to outgoing emails in kB. Set to 0 to disable sending of attachments."
     post_excerpts_in_emails: "In notification emails, always send excerpts instead of full posts"
diff --git a/config/site_settings.yml b/config/site_settings.yml
index beacd3a..fdbd133 100644
--- a/config/site_settings.yml
+++ b/config/site_settings.yml
@@ -1190,6 +1190,7 @@ email:
       - quote
       - create_replies
   always_show_trimmed_content: false
+  trim_incoming_emails: true
   private_email: false
   email_custom_template:
     default: ""
diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb
index ed2b8c8..bd32867 100644
--- a/lib/email/receiver.rb
+++ b/lib/email/receiver.rb
@@ -63,24 +63,27 @@ module Email
 
     def process!
       return if is_blocked?
+
       id_hash = Digest::SHA1.hexdigest(@message_id)
+
       DistributedMutex.synchronize("process_email_#{id_hash}") do
         begin
-
-          # If we find an existing incoming email record with the exact same
-          # message_id do not create a new IncomingEmail record to avoid double
-          # ups.
-          @incoming_email = find_existing_and_update_imap
-          return if @incoming_email
+          # If we find an existing incoming email record with the exact same `message_id`
+          # do not create a new `IncomingEmail` record to avoid double ups.
+          return if @incoming_email = find_existing_and_update_imap
 
           ensure_valid_address_lists
           ensure_valid_date
+
           @from_email, @from_display_name = parse_from_field
           @from_user = User.find_by_email(@from_email)
           @incoming_email = create_incoming_email
+
           post = process_internal
+
           raise BouncedEmailError if is_bounce?
-          return post
+
+          post
         rescue Exception => e
           error = e.to_s
           error = e.class.name if error.blank?
@@ -92,13 +95,10 @@ module Email
     end
 
     def find_existing_and_update_imap
-      incoming_email = IncomingEmail.find_by(message_id: @message_id)
-      return if !incoming_email
+      return unless incoming_email = IncomingEmail.find_by(message_id: @message_id)
 
       # If we are not doing this for IMAP purposes just return the record.
-      if @opts[:imap_uid].blank?
-        return incoming_email
-      end
+      return incoming_email if @opts[:imap_uid].blank?
 
       # If the message_id matches the post id regexp then we
       # generated the message_id not the imap server, e.g. in GroupSmtpEmail,
@@ -117,6 +117,7 @@ module Email
         imap_group_id: @opts[:imap_group_id],
         imap_sync: false
       )
+
       incoming_email
     end
 
@@ -440,6 +441,7 @@ module Email
       [:protonmail, /class="protonmail_/],
       [:zimbra, /data-marker="__/],
       [:newton, /(id|class)="cm_/],
+      [:front, /class="front-/],
     ]
 
     def extract_from_gmail(doc)
@@ -501,8 +503,14 @@ module Email
       to_markdown(doc.to_html, elided.to_html)
     end
 
+    def extract_from_front(doc)
+      # Removes anything that has a class starting with 'front-'
+      elided = doc.css("*[class^='front-']").remove
+      to_markdown(doc.to_html, elided.to_html)
+    end
+
     def trim_reply_and_extract_elided(text)
-      return [text, ""] if @opts[:skip_trimming]
+      return [text, ""] if @opts[:skip_trimming] || !SiteSetting.trim_incoming_emails
       EmailReplyTrimmer.trim(text, true)
     end
 
@@ -517,8 +525,7 @@ module Email
       encodings = COMMON_ENCODINGS.dup
       encodings.unshift(mail_part.charset) if mail_part.charset.present?
 
-      # mail (>=2.5) decodes mails with 8bit transfer encoding to utf-8, so
-      # always try UTF-8 first
+      # mail (>=2.5) decodes mails with 8bit transfer encoding to utf-8, so always try UTF-8 first
       if mail_part.content_transfer_encoding == "8bit"
         encodings.delete("UTF-8")
         encodings.unshift("UTF-8")
diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb
index 7e3fb31..dbb7f22 100644
--- a/lib/html_to_markdown.rb
+++ b/lib/html_to_markdown.rb
@@ -37,11 +37,8 @@ class HtmlToMarkdown
     @doc.traverse { |node| node.remove if !allowed.include?(node.name) }
   end
 
-  HIDDEN_STYLES ||= /(display\s*:\s*none)|(visibility\s*:\s*hidden)|(opacity\s*:\s*0)|(transform\s*:\s*scale\(0\))|((width|height)\s*:\s*0)/i
-
   def remove_hidden!(doc)
     @doc.css("[hidden]").remove
-    @doc.css("[style]").each { |n| n.remove if n["style"][HIDDEN_STYLES] }
     @doc.css("img[width]").each { |n| n.remove if n["width"].to_i <= 0 }
     @doc.css("img[height]").each { |n| n.remove if n["height"].to_i <= 0 }
   end
diff --git a/spec/components/email/receiver_spec.rb b/spec/components/email/receiver_spec.rb
index 6613f03..b3b5a08 100644
--- a/spec/components/email/receiver_spec.rb
+++ b/spec/components/email/receiver_spec.rb
@@ -530,16 +530,22 @@ describe Email::Receiver do
 
     it "doesn't include the 'elided' part of the original message when always_show_trimmed_content is disabled" do
       SiteSetting.always_show_trimmed_content = false
-      expect { process(:original_message) }.to change { topic.posts.count }.from(1).to(2)
+      expect { process(:original_message) }.to change { topic.posts.count }
       expect(topic.posts.last.raw).to eq("This is a reply :)")
     end
 
     it "adds the 'elided' part of the original message for public replies when always_show_trimmed_content is enabled" do
       SiteSetting.always_show_trimmed_content = true
-      expect { process(:original_message) }.to change { topic.posts.count }.from(1).to(2)
+      expect { process(:original_message) }.to change { topic.posts.count }
       expect(topic.posts.last.raw).to eq("This is a reply :)\n\n<details class='elided'>\n<summary title='Show trimmed content'>&#183;&#183;&#183;</summary>\n\n---Original Message---\nThis part should not be included\n\n</details>")
     end
 
+    it "doesn't trim the message when trim_incoming_emails is disabled" do
+      SiteSetting.trim_incoming_emails = false
+      expect { process(:original_message) }.to change { topic.posts.count }
+      expect(topic.posts.last.raw).to eq("This is a reply :)\n\n---Original Message---\nThis part should not be included")
+    end
+
     it "supports attached images in TEXT part" do
       SiteSetting.incoming_email_prefer_html = false
 
diff --git a/spec/components/html_to_markdown_spec.rb b/spec/components/html_to_markdown_spec.rb
index d4c71bb..b5db6e7 100644
--- a/spec/components/html_to_markdown_spec.rb
+++ b/spec/components/html_to_markdown_spec.rb
@@ -83,7 +83,6 @@ describe HtmlToMarkdown do
   end
 
   it "skips hidden tags" do
-    expect(html_to_markdown(%Q{<p>Hello <span style="display: none">cruel </span>World!</p>})).to eq("Hello World!")
     expect(html_to_markdown(%Q{<p>Hello <span hidden>cruel </span>World!</p>})).to eq("Hello World!")
   end
 
@@ -157,8 +156,6 @@ describe HtmlToMarkdown do
   it "skips hidden <img>" do
     expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" width=0>})).to eq("")
     expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" height="0">})).to eq("")
-    expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" style="width: 0">})).to eq("")

[... diff too long, it was truncated ...]

GitHub sha: cd93d1b5

This commit appears in #12874 which was approved by pmusaraj. It was merged by ZogStriP.