debian-mirror-gitlab/app/services/projects/prometheus/alerts/notify_service.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

169 lines
4.9 KiB
Ruby
Raw Normal View History

2020-04-22 19:07:51 +05:30
# frozen_string_literal: true
module Projects
module Prometheus
module Alerts
2021-02-22 17:27:13 +05:30
class NotifyService
2020-04-22 19:07:51 +05:30
include Gitlab::Utils::StrongMemoize
2020-07-28 23:09:34 +05:30
include ::IncidentManagement::Settings
2021-12-11 22:18:48 +05:30
include ::AlertManagement::Responses
2020-04-22 19:07:51 +05:30
2020-06-23 00:09:42 +05:30
# This set of keys identifies a payload as a valid Prometheus
# payload and thus processable by this service. See also
# https://prometheus.io/docs/alerting/configuration/#webhook_config
REQUIRED_PAYLOAD_KEYS = %w[
version groupKey status receiver groupLabels commonLabels
commonAnnotations externalURL alerts
].to_set.freeze
SUPPORTED_VERSION = '4'
2022-03-02 08:16:31 +05:30
# If feature flag :prometheus_notify_max_alerts is enabled truncate
# alerts to 100 and process only them.
# If feature flag is disabled process any amount of alerts.
#
# This is to mitigate incident:
# https://gitlab.com/gitlab-com/gl-infra/production/-/issues/6086
PROCESS_MAX_ALERTS = 100
2021-02-22 17:27:13 +05:30
def initialize(project, payload)
@project = project
@payload = payload
end
def execute(token, integration = nil)
2020-04-22 19:07:51 +05:30
return bad_request unless valid_payload_size?
2021-02-22 17:27:13 +05:30
return unprocessable_entity unless self.class.processable?(payload)
return unauthorized unless valid_alert_manager_token?(token, integration)
2020-04-22 19:07:51 +05:30
2022-03-02 08:16:31 +05:30
truncate_alerts! if max_alerts_exceeded?
2021-12-11 22:18:48 +05:30
alert_responses = process_prometheus_alerts
2020-04-22 19:07:51 +05:30
2021-12-11 22:18:48 +05:30
alert_response(alert_responses)
2020-04-22 19:07:51 +05:30
end
2021-02-22 17:27:13 +05:30
def self.processable?(payload)
2020-06-23 00:09:42 +05:30
# Workaround for https://gitlab.com/gitlab-org/gitlab/-/issues/220496
2021-02-22 17:27:13 +05:30
return false unless payload
2020-06-23 00:09:42 +05:30
2021-02-22 17:27:13 +05:30
REQUIRED_PAYLOAD_KEYS.subset?(payload.keys.to_set) &&
payload['version'] == SUPPORTED_VERSION
2020-06-23 00:09:42 +05:30
end
2020-04-22 19:07:51 +05:30
private
2021-02-22 17:27:13 +05:30
attr_reader :project, :payload
2020-04-22 19:07:51 +05:30
def valid_payload_size?
2021-02-22 17:27:13 +05:30
Gitlab::Utils::DeepSize.new(payload).valid?
2020-04-22 19:07:51 +05:30
end
2022-03-02 08:16:31 +05:30
def max_alerts_exceeded?
return false unless Feature.enabled?(:prometheus_notify_max_alerts, project, type: :ops)
alerts.size > PROCESS_MAX_ALERTS
2020-04-22 19:07:51 +05:30
end
2022-03-02 08:16:31 +05:30
def truncate_alerts!
Gitlab::AppLogger.warn(
message: 'Prometheus payload exceeded maximum amount of alerts. Truncating alerts.',
project_id: project.id,
alerts: {
total: alerts.size,
max: PROCESS_MAX_ALERTS
}
)
payload['alerts'] = alerts.first(PROCESS_MAX_ALERTS)
2020-04-22 19:07:51 +05:30
end
def alerts
2021-02-22 17:27:13 +05:30
payload['alerts']
2020-04-22 19:07:51 +05:30
end
2021-02-22 17:27:13 +05:30
def valid_alert_manager_token?(token, integration)
2020-06-23 00:09:42 +05:30
valid_for_manual?(token) ||
2021-02-22 17:27:13 +05:30
valid_for_alerts_endpoint?(token, integration) ||
2021-06-08 01:23:25 +05:30
valid_for_cluster?(token)
2020-04-22 19:07:51 +05:30
end
def valid_for_manual?(token)
2021-09-30 23:02:18 +05:30
prometheus = project.find_or_initialize_integration('prometheus')
2020-04-22 19:07:51 +05:30
return false unless prometheus.manual_configuration?
if setting = project.alerting_setting
compare_token(token, setting.token)
else
token.nil?
end
end
2021-02-22 17:27:13 +05:30
def valid_for_alerts_endpoint?(token, integration)
return false unless integration&.active?
2020-06-23 00:09:42 +05:30
2021-02-22 17:27:13 +05:30
compare_token(token, integration.token)
2020-06-23 00:09:42 +05:30
end
2021-06-08 01:23:25 +05:30
def valid_for_cluster?(token)
cluster_integration = find_cluster_integration(project)
return false unless cluster_integration
cluster_integration_token = cluster_integration.alert_manager_token
2020-04-22 19:07:51 +05:30
if token
2021-06-08 01:23:25 +05:30
compare_token(token, cluster_integration_token)
2020-04-22 19:07:51 +05:30
else
2021-06-08 01:23:25 +05:30
cluster_integration_token.nil?
2020-04-22 19:07:51 +05:30
end
end
2021-06-08 01:23:25 +05:30
def find_cluster_integration(project)
2020-04-22 19:07:51 +05:30
alert_id = gitlab_alert_id
return unless alert_id
alert = find_alert(project, alert_id)
return unless alert
cluster = alert.environment.deployment_platform&.cluster
return unless cluster&.enabled?
2021-09-04 01:27:46 +05:30
return unless cluster.integration_prometheus_available?
2020-04-22 19:07:51 +05:30
2021-09-04 01:27:46 +05:30
cluster.integration_prometheus
2020-04-22 19:07:51 +05:30
end
def find_alert(project, metric)
Projects::Prometheus::AlertsFinder
.new(project: project, metric: metric)
.execute
.first
end
def gitlab_alert_id
alerts&.first&.dig('labels', 'gitlab_alert_id')
end
def compare_token(expected, actual)
return unless expected && actual
ActiveSupport::SecurityUtils.secure_compare(expected, actual)
end
2020-05-24 23:13:21 +05:30
def process_prometheus_alerts
2021-12-11 22:18:48 +05:30
alerts.map do |alert|
2020-05-24 23:13:21 +05:30
AlertManagement::ProcessPrometheusAlertService
2021-02-22 17:27:13 +05:30
.new(project, alert.to_h)
2020-05-24 23:13:21 +05:30
.execute
end
end
2021-12-11 22:18:48 +05:30
def alert_response(alert_responses)
2022-03-02 08:16:31 +05:30
alerts = alert_responses.flat_map { |resp| resp.payload[:alerts] }.compact
2020-04-22 19:07:51 +05:30
2021-12-11 22:18:48 +05:30
success(alerts)
2020-04-22 19:07:51 +05:30
end
end
end
end
end