FIX: Prevent duplicate alerts for clustered alertmanagers

FIX: Prevent duplicate alerts for clustered alertmanagers

This was happening when the starts_at time varied between cluster. Now we simply key on (datacenter, id, !resolved)

diff --git a/app/jobs/regular/process_alert.rb b/app/jobs/regular/process_alert.rb
index 963b0ea..5f16ee7 100644
--- a/app/jobs/regular/process_alert.rb
+++ b/app/jobs/regular/process_alert.rb
@@ -201,7 +201,8 @@ module Jobs
 
           stored_alert = new_history.find do |p|
             p['id'] == alert['labels']['id'] &&
-              DateTime.parse(p['starts_at']).to_s == DateTime.parse(alert['startsAt']).to_s
+              p['datacenter'] == datacenter &&
+              p["status"] != "resolved"
           end
 
           alert_description = alert.dig('annotations', 'description')
diff --git a/spec/integration/discourse_prometheus_alert_receiver/receiver_controller_spec.rb b/spec/integration/discourse_prometheus_alert_receiver/receiver_controller_spec.rb
index dbb1716..3ff803e 100644
--- a/spec/integration/discourse_prometheus_alert_receiver/receiver_controller_spec.rb
+++ b/spec/integration/discourse_prometheus_alert_receiver/receiver_controller_spec.rb
@@ -1194,6 +1194,29 @@ RSpec.describe DiscoursePrometheusAlertReceiver::ReceiverController do
           )
         end
 
+        it "does not change the existing topic, even if the start time is different" do
+          # Can happen in a clustered alertmanager setup
+          payload["alerts"].first["startsAt"] = "2020-01-02T03:05:05.87654321Z"
+
+          expect do
+            post "/prometheus/receiver/#{token}", params: payload
+          end.to_not change { topic.reload.posts.first.revisions.count }
+
+          expect(topic.custom_fields[custom_field_key]['alerts']).to eq(
+            [
+              {
+                'id' => "somethingfunny",
+                'starts_at' => "2020-01-02T03:04:05.12345678Z",
+                'graph_url' => "http://alerts.example.com/graph?g0.expr=lolrus",
+                'status' => 'firing',
+                'description' => 'some description',
+                'datacenter' => datacenter,
+                'external_url' => external_url
+              },
+            ]
+          )
+        end
+
         it 'reassigns the alert if topic has no assignee' do
           TopicAssigner.new(topic, Discourse.system_user).unassign
 
@@ -1313,7 +1336,7 @@ RSpec.describe DiscoursePrometheusAlertReceiver::ReceiverController do
                 'id' => 'somethingfunny',
                 'starts_at' => "2020-01-02T03:04:05.12345678Z",
                 'graph_url' => "http://alerts.example.com/graph?g0.expr=lolrus",
-                'status' => "resolved",
+                'status' => "firing",
                 'datacenter' => datacenter
               }
             ]

GitHub sha: a0ad42de

1 Like