2020-04-22 19:07:51 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Projects
|
|
|
|
module Prometheus
|
|
|
|
module Alerts
|
2021-02-22 17:27:13 +05:30
|
|
|
class NotifyService
|
2020-04-22 19:07:51 +05:30
|
|
|
include Gitlab::Utils::StrongMemoize
|
2020-07-28 23:09:34 +05:30
|
|
|
include ::IncidentManagement::Settings
|
2021-12-11 22:18:48 +05:30
|
|
|
include ::AlertManagement::Responses
|
2020-04-22 19:07:51 +05:30
|
|
|
|
2020-06-23 00:09:42 +05:30
|
|
|
# This set of keys identifies a payload as a valid Prometheus
|
|
|
|
# payload and thus processable by this service. See also
|
|
|
|
# https://prometheus.io/docs/alerting/configuration/#webhook_config
|
|
|
|
REQUIRED_PAYLOAD_KEYS = %w[
|
|
|
|
version groupKey status receiver groupLabels commonLabels
|
|
|
|
commonAnnotations externalURL alerts
|
|
|
|
].to_set.freeze
|
|
|
|
|
|
|
|
SUPPORTED_VERSION = '4'
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
# If feature flag :prometheus_notify_max_alerts is enabled truncate
|
|
|
|
# alerts to 100 and process only them.
|
|
|
|
# If feature flag is disabled process any amount of alerts.
|
|
|
|
#
|
|
|
|
# This is to mitigate incident:
|
|
|
|
# https://gitlab.com/gitlab-com/gl-infra/production/-/issues/6086
|
|
|
|
PROCESS_MAX_ALERTS = 100
|
|
|
|
|
2021-02-22 17:27:13 +05:30
|
|
|
def initialize(project, payload)
|
|
|
|
@project = project
|
|
|
|
@payload = payload
|
|
|
|
end
|
|
|
|
|
|
|
|
def execute(token, integration = nil)
|
2020-04-22 19:07:51 +05:30
|
|
|
return bad_request unless valid_payload_size?
|
2021-02-22 17:27:13 +05:30
|
|
|
return unprocessable_entity unless self.class.processable?(payload)
|
|
|
|
return unauthorized unless valid_alert_manager_token?(token, integration)
|
2020-04-22 19:07:51 +05:30
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
truncate_alerts! if max_alerts_exceeded?
|
|
|
|
|
2021-12-11 22:18:48 +05:30
|
|
|
alert_responses = process_prometheus_alerts
|
2020-04-22 19:07:51 +05:30
|
|
|
|
2021-12-11 22:18:48 +05:30
|
|
|
alert_response(alert_responses)
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
|
2021-02-22 17:27:13 +05:30
|
|
|
def self.processable?(payload)
|
2020-06-23 00:09:42 +05:30
|
|
|
# Workaround for https://gitlab.com/gitlab-org/gitlab/-/issues/220496
|
2021-02-22 17:27:13 +05:30
|
|
|
return false unless payload
|
2020-06-23 00:09:42 +05:30
|
|
|
|
2021-02-22 17:27:13 +05:30
|
|
|
REQUIRED_PAYLOAD_KEYS.subset?(payload.keys.to_set) &&
|
|
|
|
payload['version'] == SUPPORTED_VERSION
|
2020-06-23 00:09:42 +05:30
|
|
|
end
|
|
|
|
|
2020-04-22 19:07:51 +05:30
|
|
|
private
|
|
|
|
|
2021-02-22 17:27:13 +05:30
|
|
|
attr_reader :project, :payload
|
|
|
|
|
2020-04-22 19:07:51 +05:30
|
|
|
def valid_payload_size?
|
2021-02-22 17:27:13 +05:30
|
|
|
Gitlab::Utils::DeepSize.new(payload).valid?
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
def max_alerts_exceeded?
|
|
|
|
return false unless Feature.enabled?(:prometheus_notify_max_alerts, project, type: :ops)
|
|
|
|
|
|
|
|
alerts.size > PROCESS_MAX_ALERTS
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
def truncate_alerts!
|
|
|
|
Gitlab::AppLogger.warn(
|
|
|
|
message: 'Prometheus payload exceeded maximum amount of alerts. Truncating alerts.',
|
|
|
|
project_id: project.id,
|
|
|
|
alerts: {
|
|
|
|
total: alerts.size,
|
|
|
|
max: PROCESS_MAX_ALERTS
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
payload['alerts'] = alerts.first(PROCESS_MAX_ALERTS)
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def alerts
|
2021-02-22 17:27:13 +05:30
|
|
|
payload['alerts']
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
|
2021-02-22 17:27:13 +05:30
|
|
|
def valid_alert_manager_token?(token, integration)
|
2020-06-23 00:09:42 +05:30
|
|
|
valid_for_manual?(token) ||
|
2021-02-22 17:27:13 +05:30
|
|
|
valid_for_alerts_endpoint?(token, integration) ||
|
2021-06-08 01:23:25 +05:30
|
|
|
valid_for_cluster?(token)
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def valid_for_manual?(token)
|
2021-09-30 23:02:18 +05:30
|
|
|
prometheus = project.find_or_initialize_integration('prometheus')
|
2020-04-22 19:07:51 +05:30
|
|
|
return false unless prometheus.manual_configuration?
|
|
|
|
|
|
|
|
if setting = project.alerting_setting
|
|
|
|
compare_token(token, setting.token)
|
|
|
|
else
|
|
|
|
token.nil?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-02-22 17:27:13 +05:30
|
|
|
def valid_for_alerts_endpoint?(token, integration)
|
|
|
|
return false unless integration&.active?
|
2020-06-23 00:09:42 +05:30
|
|
|
|
2021-02-22 17:27:13 +05:30
|
|
|
compare_token(token, integration.token)
|
2020-06-23 00:09:42 +05:30
|
|
|
end
|
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
def valid_for_cluster?(token)
|
|
|
|
cluster_integration = find_cluster_integration(project)
|
|
|
|
return false unless cluster_integration
|
|
|
|
|
|
|
|
cluster_integration_token = cluster_integration.alert_manager_token
|
2020-04-22 19:07:51 +05:30
|
|
|
|
|
|
|
if token
|
2021-06-08 01:23:25 +05:30
|
|
|
compare_token(token, cluster_integration_token)
|
2020-04-22 19:07:51 +05:30
|
|
|
else
|
2021-06-08 01:23:25 +05:30
|
|
|
cluster_integration_token.nil?
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
def find_cluster_integration(project)
|
2020-04-22 19:07:51 +05:30
|
|
|
alert_id = gitlab_alert_id
|
|
|
|
return unless alert_id
|
|
|
|
|
|
|
|
alert = find_alert(project, alert_id)
|
|
|
|
return unless alert
|
|
|
|
|
|
|
|
cluster = alert.environment.deployment_platform&.cluster
|
|
|
|
return unless cluster&.enabled?
|
2021-09-04 01:27:46 +05:30
|
|
|
return unless cluster.integration_prometheus_available?
|
2020-04-22 19:07:51 +05:30
|
|
|
|
2021-09-04 01:27:46 +05:30
|
|
|
cluster.integration_prometheus
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def find_alert(project, metric)
|
|
|
|
Projects::Prometheus::AlertsFinder
|
|
|
|
.new(project: project, metric: metric)
|
|
|
|
.execute
|
|
|
|
.first
|
|
|
|
end
|
|
|
|
|
|
|
|
def gitlab_alert_id
|
|
|
|
alerts&.first&.dig('labels', 'gitlab_alert_id')
|
|
|
|
end
|
|
|
|
|
|
|
|
def compare_token(expected, actual)
|
|
|
|
return unless expected && actual
|
|
|
|
|
|
|
|
ActiveSupport::SecurityUtils.secure_compare(expected, actual)
|
|
|
|
end
|
|
|
|
|
2020-05-24 23:13:21 +05:30
|
|
|
def process_prometheus_alerts
|
2021-12-11 22:18:48 +05:30
|
|
|
alerts.map do |alert|
|
2020-05-24 23:13:21 +05:30
|
|
|
AlertManagement::ProcessPrometheusAlertService
|
2021-02-22 17:27:13 +05:30
|
|
|
.new(project, alert.to_h)
|
2020-05-24 23:13:21 +05:30
|
|
|
.execute
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-12-11 22:18:48 +05:30
|
|
|
def alert_response(alert_responses)
|
2022-03-02 08:16:31 +05:30
|
|
|
alerts = alert_responses.flat_map { |resp| resp.payload[:alerts] }.compact
|
2020-04-22 19:07:51 +05:30
|
|
|
|
2021-12-11 22:18:48 +05:30
|
|
|
success(alerts)
|
2020-04-22 19:07:51 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|