FEATURE: Improve formatting for Slack transcript messages (#70)

FEATURE: Improve formatting for Slack transcript messages (#70)

  • Fix multi-line code blocks
  • Add strikethrough support
  • Fix HTML entities inside code blocks
  • Do not process formatting inside code blocks
  • Ensure links are never created with no URL
  • Replace - with _ in emoji names
diff --git a/lib/discourse_chat/provider/slack/slack_message.rb b/lib/discourse_chat/provider/slack/slack_message.rb
index e1b4828..7b0cbac 100644
--- a/lib/discourse_chat/provider/slack/slack_message.rb
+++ b/lib/discourse_chat/provider/slack/slack_message.rb
@@ -29,11 +29,27 @@ module DiscourseChat::Provider::SlackProvider
     def text
       text = @raw['text'].nil? ? "" : @raw['text']
 
+      pre = {}
+
+      # Extract code blocks and replace with placeholder
+      text = text.gsub(/`‍``(.*?)`‍``/m) do |match|
+        key = "pre:" + SecureRandom.alphanumeric(50)
+        pre[key] = HTMLEntities.new.decode $1
+        "\n`‍``\n#{key}\n`‍``\n"
+      end
+
+      # # Extract inline code and replace with placeholder
+      text = text.gsub(/(?<!`)`([^`]+?)`(?!`)/) do |match|
+        key = "pre:" + SecureRandom.alphanumeric(50)
+        pre[key] = HTMLEntities.new.decode $1
+        "`#{key}`"
+      end
+
       # Format links (don't worry about special cases @ # !)
       text = text.gsub(/<(.*?)>/) do |match|
         group = $1
         parts = group.split('|')
-        link = parts[0].start_with?('@', '#', '!') ? '' : parts[0]
+        link = parts[0].start_with?('@', '#', '!') ? nil : parts[0]
         text = parts.length > 1 ? parts[1] : parts[0]
 
         if parts[0].start_with?('@')
@@ -46,14 +62,35 @@ module DiscourseChat::Provider::SlackProvider
           next "@#{user_name}"
         end
 
-        "[#{text}](#{link})"
+        if link.nil?
+          text
+        elsif link == text
+          "<#{link}>"
+        else
+          "[#{text}](#{link})"
+        end
       end
 
       # Add an extra * to each side for bold
-      text = text.gsub(/\*(.*?)\*/) do |match|
+      text = text.gsub(/\*.*?\*/) do |match|
         "*#{match}*"
       end
 
+      # Add an extra ~ to each side for strikethrough
+      text = text.gsub(/~.*?~/) do |match|
+        "~#{match}~"
+      end
+
+      # Replace emoji - with _
+      text = text.gsub(/:[a-z0-9_-]+:/) do |match|
+        match.gsub("-") { "_" }
+      end
+
+      # Restore pre-formatted code block content
+      pre.each do |key, value|
+        text = text.gsub(key) { value }
+      end
+
       text
     end
 
diff --git a/spec/lib/discourse_chat/provider/slack/slack_transcript_spec.rb b/spec/lib/discourse_chat/provider/slack/slack_transcript_spec.rb
index e8e1571..fb5f89f 100644
--- a/spec/lib/discourse_chat/provider/slack/slack_transcript_spec.rb
+++ b/spec/lib/discourse_chat/provider/slack/slack_transcript_spec.rb
@@ -333,5 +333,117 @@ RSpec.describe DiscourseChat::Provider::SlackProvider::SlackTranscript do
         expect(first_ui[:text]).to eq(transcript.first_message.raw_text)
       end
     end
+
+    describe "message formatting" do
+      it 'handles code block newlines' do
+        message = DiscourseChat::Provider::SlackProvider::SlackMessage.new(
+          {
+            "type" => "message",
+            "user" => "U5Z773QLS",
+            "text" => "Here is some code`‍``my code\nwith newline`‍``",
+            "ts" => "1501093331.439776"
+          },
+          transcript
+        )
+        expect(message.text).to eq(<<~MD)
+          Here is some code
+          `‍``
+          my code
+          with newline
+          `‍``
+        MD
+      end
+
+      it 'handles multiple code blocks' do
+        message = DiscourseChat::Provider::SlackProvider::SlackMessage.new(
+          {
+            "type" => "message",
+            "user" => "U5Z773QLS",
+            "text" => "Here is some code`‍``my code\nwith newline`‍``and another`‍``some more code`‍``",
+            "ts" => "1501093331.439776"
+          },
+          transcript
+        )
+        expect(message.text).to eq(<<~MD)
+          Here is some code
+          `‍``
+          my code
+          with newline
+          `‍``
+          and another
+          `‍``
+          some more code
+          `‍``
+        MD
+      end
+
+      it 'handles strikethrough' do
+        message = DiscourseChat::Provider::SlackProvider::SlackMessage.new(
+          {
+            "type" => "message",
+            "user" => "U5Z773QLS",
+            "text" => "Some ~strikethrough~",
+            "ts" => "1501093331.439776"
+          },
+          transcript
+        )
+        expect(message.text).to eq("Some ~~strikethrough~~")
+      end
+
+      it 'handles slack links' do
+        message = DiscourseChat::Provider::SlackProvider::SlackMessage.new(
+          {
+            "type" => "message",
+            "user" => "U5Z773QLS",
+            "text" => "A link to <https://google.com|google>, <https://autolinked.com|https://autolinked.com>, <https://notext.com>, <#channel>, <@user>",
+            "ts" => "1501093331.439776"
+          },
+          transcript
+        )
+        expect(message.text).to eq("A link to [google](https://google.com), <https://autolinked.com>, <https://notext.com>, #channel, @user")
+      end
+
+      it 'does not format things inside backticks' do
+        message = DiscourseChat::Provider::SlackProvider::SlackMessage.new(
+          {
+            "type" => "message",
+            "user" => "U5Z773QLS",
+            "text" => "You can strikethrough like `~this~`, bold like `*this*` and link like `[https://example.com](https://example.com)`",
+            "ts" => "1501093331.439776"
+          },
+          transcript
+        )
+        expect(message.text).to eq("You can strikethrough like `~this~`, bold like `*this*` and link like `[https://example.com](https://example.com)`")
+      end
+
+      it 'unescapes html in backticks' do
+        # Because Slack escapes HTML entities, even in backticks
+        message = DiscourseChat::Provider::SlackProvider::SlackMessage.new(
+          {
+            "type" => "message",
+            "user" => "U5Z773QLS",
+            "text" => "The code is `&lt;stuff&gt;`",
+            "ts" => "1501093331.439776"
+          },
+          transcript
+        )
+        expect(message.text).to eq("The code is `<stuff>`")
+      end
+
+      it 'updates emoji dashes to underscores' do
+        # Discourse does not allow dashes in emoji names, so this helps communities have matching custom emojis
+        message = DiscourseChat::Provider::SlackProvider::SlackMessage.new(
+          {
+            "type" => "message",
+            "user" => "U5Z773QLS",
+            "text" => "This is :my-emoji:",
+            "ts" => "1501093331.439776"
+          },
+          transcript
+        )
+        expect(message.text).to eq("This is :my_emoji:")
+      end
+    end
+
   end
 end

GitHub sha: 08e67187

This commit appears in #70 which was approved by ZogStriP. It was merged by davidtaylorhq.