FIX: makes bbcode parsing much more resilient by using nokogiri on cooked

FIX: makes bbcode parsing much more resilient by using nokogiri on cooked

diff --git a/app/models/discourse_post_event/event.rb b/app/models/discourse_post_event/event.rb
index 543d89f..c99f9aa 100644
--- a/app/models/discourse_post_event/event.rb
+++ b/app/models/discourse_post_event/event.rb
@@ -175,7 +175,7 @@ module DiscoursePostEvent
     end
 
     def self.update_from_raw(post)
-      events = DiscoursePostEvent::EventParser.extract_events(post.raw)
+      events = DiscoursePostEvent::EventParser.extract_events(post)
       if events.present?
         event_params = events.first
         event = post.event || Event.new(id: post.id)
@@ -184,7 +184,7 @@ module DiscoursePostEvent
           starts_at: event_params[:start] || event.starts_at,
           ends_at: event_params[:end] || event.ends_at,
           status: event_params[:status].present? ? Event.statuses[event_params[:status].to_sym] : event.status,
-          raw_invitees: event_params[:allowedGroups] ? event_params[:allowedGroups].split(',') : nil
+          raw_invitees: event_params[:"allowed-groups"] ? event_params[:"allowed-groups"].split(',') : nil
         }
         event.enforce_utc!(params)
         event.update_with_params!(params)
diff --git a/lib/discourse_post_event/event_parser.rb b/lib/discourse_post_event/event_parser.rb
index c245a4d..777973e 100644
--- a/lib/discourse_post_event/event_parser.rb
+++ b/lib/discourse_post_event/event_parser.rb
@@ -1,34 +1,32 @@
 # frozen_string_literal: true
 
-EVENT_REGEX = /\[wrap=event\s(.*?)\]/m
-EVENT_OPTIONS_REGEX = /(\w+\=".*?")/m
-
 VALID_OPTIONS = [
   :start,
   :end,
   :status,
-  :allowedGroups,
+  :"allowed-groups",
   :name
 ]
 
 module DiscoursePostEvent
   class EventParser
-    def self.extract_events(str)
-      str.scan(EVENT_REGEX).map do |scan|
-        extract_options(scan[0].gsub(/\\/, ''))
-      end.compact
-    end
+    def self.extract_events(post)
+      cooked = PrettyText.cook(post.raw, topic_id: post.topic_id, user_id: post.user_id)
+      valid_options = VALID_OPTIONS.map { |o| "data-#{o}" }
 
-    def self.extract_options(str)
-      options = nil
-      str.scan(EVENT_OPTIONS_REGEX).each do |option|
-        key, value = option[0].split("=")
-        if VALID_OPTIONS.include?(key.to_sym) && value
-          options ||= {}
-          options[key.to_sym] = value.delete('\\"')
+      Nokogiri::HTML(cooked).css('[data-wrap="event"]').map do |doc|
+        event = nil
+        doc.attributes.values.each do |attribute|
+          name = attribute.name
+          value = attribute.value
+
+          if valid_options.include?(name) && value
+            event ||= {}
+            event[name["data-".length..-1].to_sym] = CGI.escapeHTML(value)
+          end
         end
-      end
-      options
+        event
+      end.compact
     end
   end
 end
diff --git a/lib/discourse_post_event/event_validator.rb b/lib/discourse_post_event/event_validator.rb
index d608559..38ad44a 100644
--- a/lib/discourse_post_event/event_validator.rb
+++ b/lib/discourse_post_event/event_validator.rb
@@ -7,7 +7,7 @@ module DiscoursePostEvent
     end
 
     def validate_event
-      extracted_events = DiscoursePostEvent::EventParser::extract_events(@post.raw)
+      extracted_events = DiscoursePostEvent::EventParser::extract_events(@post)
 
       if extracted_events.count == 0
         return false
diff --git a/spec/lib/event_parser_spec.rb b/spec/lib/event_parser_spec.rb
index 109314e..37d0c17 100644
--- a/spec/lib/event_parser_spec.rb
+++ b/spec/lib/event_parser_spec.rb
@@ -2,46 +2,79 @@
 
 require "rails_helper"
 
+def build_post(user, raw)
+  Post.new(user: user, raw: raw)
+end
+
 describe DiscoursePostEvent::EventParser do
   subject { DiscoursePostEvent::EventParser }
 
+  let(:user) { Fabricate(:user) }
+
   it 'works with no event' do
-    events = subject.extract_events('this could be a nice event')
+    events = subject.extract_events(build_post(user, 'this could be a nice event'))
     expect(events.length).to eq(0)
   end
 
   it 'finds one event' do
-    events = subject.extract_events('[wrap=event start="foo" end="bar"]\n[/wrap]')
+    events = subject.extract_events(build_post(user, '[wrap=event start="foo" end="bar"]\n[/wrap]'))
     expect(events.length).to eq(1)
   end
 
   it 'finds multiple events' do
-    events = subject.extract_events('[wrap=event start="foo" end="bar"]\n[/wrap] baz [wrap=event start="foo" end="bar"]\n[/wrap]')
+    post_event = build_post user, <<-TXT
+[wrap=event start="2020"][/wrap]
+
+[wrap=event start="2021"][/wrap]
+    TXT
+
+    events = subject.extract_events(post_event)
     expect(events.length).to eq(2)
   end
 
   it 'parses options' do
-    events = subject.extract_events('[wrap=event start="foo" end="bar"]\n[/wrap]')
+    events = subject.extract_events(build_post(user, '[wrap=event start="foo" end="bar"]\n[/wrap]'))
     expect(events[0][:start]).to eq("foo")
     expect(events[0][:end]).to eq("bar")
   end
 
   it 'works with escaped string' do
-    events = subject.extract_events("I am going to get that fixed.\n\n[wrap=event start=\"bar\"]\n[/wrap]\n\n[wrap=event start=\"foo\"]\n[/wrap]")
+    events = subject.extract_events(build_post(user, "I am going to get that fixed.\n\n[wrap=event start=\"bar\"]\n[/wrap]"))
     expect(events[0][:start]).to eq("bar")
-    expect(events[1][:start]).to eq("foo")
   end
 
   it 'parses options where value has spaces' do
-    events = subject.extract_events('[wrap=event start="foo" name="bar baz"]\n[/wrap]')
+    events = subject.extract_events(build_post(user, '[wrap=event start="foo" name="bar baz"]\n[/wrap]'))
     expect(events[0][:name]).to eq("bar baz")
   end
 
   it 'doesn’t parse invalid options' do
-    events = subject.extract_events("I am going to get that fixed.\n\n[wrap=event start=\"foo\" something=\"bar\"]\n[/wrap]")
+    events = subject.extract_events(build_post(user, "I am going to get that fixed.\n\n[wrap=event start=\"foo\" something=\"bar\"]\n[/wrap]"))
     expect(events[0][:something]).to be(nil)
 
-    events = subject.extract_events("I am going to get that fixed.\n\n[wrap=event something=\"bar\"]\n[/wrap]")
+    events = subject.extract_events(build_post(user, "I am going to get that fixed.\n\n[wrap=event something=\"bar\"]\n[/wrap]"))
+    expect(events).to eq([])
+  end
+
+  it 'doesn’t parse an event in codeblock' do
+    post_event = build_post user, <<-TXT
+      Example event:
+      `‍``
+      [wrap=event start=\"bar\"]\n[/wrap]
+      `‍``
+    TXT
+
+    events = subject.extract_events(post_event)
+
+    expect(events).to eq([])
+  end
+
+  it 'doesn’t parse in blockquote' do
+    post_event = build_post user, <<-TXT
+      [wrap=event start="2020"][/wrap]
+    TXT
+
+    events = subject.extract_events(post_event)
     expect(events).to eq([])
   end
 end

GitHub sha: bcff2fcb

Awesome! I’m always happy to improve resilience.

1 Like

Yes it’s quite cool, it solves a lot of issues to just rely on cooked: when in code block, quotes, syntax error…

1 Like