debian-mirror-gitlab/lib/gitlab/usage_data_counters/hll_redis_counter.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

329 lines
13 KiB
Ruby
Raw Normal View History

2020-10-24 23:57:45 +05:30
# frozen_string_literal: true
module Gitlab
module UsageDataCounters
module HLLRedisCounter
DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH = 6.weeks
DEFAULT_DAILY_KEY_EXPIRY_LENGTH = 29.days
2021-01-29 00:20:46 +05:30
DEFAULT_REDIS_SLOT = ''
EventError = Class.new(StandardError)
UnknownEvent = Class.new(EventError)
UnknownAggregation = Class.new(EventError)
AggregationMismatch = Class.new(EventError)
SlotMismatch = Class.new(EventError)
CategoryMismatch = Class.new(EventError)
InvalidContext = Class.new(EventError)
KNOWN_EVENTS_PATH = File.expand_path('known_events/*.yml', __dir__)
2020-10-24 23:57:45 +05:30
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
2021-11-11 11:23:49 +05:30
CATEGORIES_FOR_TOTALS = %w[
compliance
2022-05-07 20:08:51 +05:30
error_tracking
2021-11-11 11:23:49 +05:30
ide_edit
pipeline_authoring
].freeze
2022-08-13 15:12:31 +05:30
CATEGORIES_COLLECTED_FROM_METRICS_DEFINITIONS = %w[
2022-11-25 23:54:43 +05:30
analytics
2022-08-13 15:12:31 +05:30
ci_users
2022-10-11 01:57:18 +05:30
deploy_token_packages
code_review
ecosystem
2022-08-13 15:12:31 +05:30
error_tracking
ide_edit
importer
2022-11-25 23:54:43 +05:30
incident_management
2022-08-13 15:12:31 +05:30
incident_management_alerts
2022-11-25 23:54:43 +05:30
issues_edit
2022-08-27 11:52:29 +05:30
kubernetes_agent
2022-11-25 23:54:43 +05:30
manage
2022-08-13 15:12:31 +05:30
pipeline_authoring
2022-11-25 23:54:43 +05:30
quickactions
2022-08-27 11:52:29 +05:30
search
2022-08-13 15:12:31 +05:30
secure
snippets
source_code
terraform
testing
2022-10-11 01:57:18 +05:30
user_packages
2022-08-13 15:12:31 +05:30
work_items
].freeze
2020-10-24 23:57:45 +05:30
# Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id
#
2021-01-29 00:20:46 +05:30
# All events should be added to known_events yml files lib/gitlab/usage_data_counters/known_events/
2020-10-24 23:57:45 +05:30
#
# Event example:
#
# - name: g_compliance_dashboard # Unique event name
# redis_slot: compliance # Optional slot name, if not defined it will use name as a slot, used for totals
# category: compliance # Group events in categories
# expiry: 29 # Optional expiration time in days, default value 29 days for daily and 6.weeks for weekly
# aggregation: daily # Aggregation level, keys are stored daily or weekly
2021-01-29 00:20:46 +05:30
# feature_flag: # The event feature flag
2020-10-24 23:57:45 +05:30
#
# Usage:
#
2021-03-08 18:12:59 +05:30
# * Track event: Gitlab::UsageDataCounters::HLLRedisCounter.track_event('g_compliance_dashboard', values: user_id)
2020-10-24 23:57:45 +05:30
# * Get unique counts per user: Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'g_compliance_dashboard', start_date: 28.days.ago, end_date: Date.current)
class << self
2020-11-24 15:15:51 +05:30
include Gitlab::Utils::UsageData
2021-09-04 01:27:46 +05:30
include Gitlab::Usage::TimeFrame
2020-11-24 15:15:51 +05:30
2021-03-08 18:12:59 +05:30
# Track unique events
#
# event_name - The event name.
# values - One or multiple values counted.
# time - Time of the action, set to Time.current.
def track_event(event_name, values:, time: Time.current)
track(values, event_name, time: time)
2020-10-24 23:57:45 +05:30
end
2021-03-08 18:12:59 +05:30
# Track unique events
#
# event_name - The event name.
# values - One or multiple values counted.
# context - Event context, plan level tracking.
# time - Time of the action, set to Time.current.
def track_event_in_context(event_name, values:, context:, time: Time.zone.now)
2021-01-29 00:20:46 +05:30
return if context.blank?
return unless context.in?(valid_context_list)
2020-10-24 23:57:45 +05:30
2021-03-08 18:12:59 +05:30
track(values, event_name, context: context, time: time)
2021-01-29 00:20:46 +05:30
end
2020-10-24 23:57:45 +05:30
2022-06-21 17:19:12 +05:30
# Count unique events for a given time range.
#
# event_names - The list of the events to count.
# start_date - The start date of the time range.
# end_date - The end date of the time range.
# context - Event context, plan level tracking. Available if set when tracking.
2021-01-29 00:20:46 +05:30
def unique_events(event_names:, start_date:, end_date:, context: '')
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date, context: context) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise CategoryMismatch, events unless events_in_same_category?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
raise InvalidContext if context.present? && !context.in?(valid_context_list)
end
2020-11-24 15:15:51 +05:30
end
def categories
@categories ||= known_events.map { |event| event[:category] }.uniq
2020-10-24 23:57:45 +05:30
end
2022-08-27 11:52:29 +05:30
def categories_collected_from_metrics_definitions
CATEGORIES_COLLECTED_FROM_METRICS_DEFINITIONS
end
2020-11-24 15:15:51 +05:30
# @param category [String] the category name
# @return [Array<String>] list of event names for given category
2020-10-24 23:57:45 +05:30
def events_for_category(category)
2020-11-24 15:15:51 +05:30
known_events.select { |event| event[:category] == category.to_s }.map { |event| event[:name] }
end
2022-06-21 17:19:12 +05:30
# Recent 7 or 28 days unique events data for events defined in /lib/gitlab/usage_data_counters/known_events/
#
# - For metrics for which we store a key per day, we have the last 7 days or last 28 days of data.
# - For metrics for which we store a key per week, we have the last complete week or last 4 complete weeks
# daily or weekly information is in the file we have for events definition /lib/gitlab/usage_data_counters/known_events/
# - Most of the metrics have weekly aggregation. We recommend this as it generates fewer keys in Redis to store.
# - The aggregation used doesn't affect data granulation.
2020-11-24 15:15:51 +05:30
def unique_events_data
2022-08-13 15:12:31 +05:30
categories_pending_migration.each_with_object({}) do |category, category_results|
2020-11-24 15:15:51 +05:30
events_names = events_for_category(category)
event_results = events_names.each_with_object({}) do |event, hash|
2022-03-02 08:16:31 +05:30
hash["#{event}_weekly"] = unique_events(**weekly_time_range.merge(event_names: [event])) unless event == "i_package_composer_deploy_token"
2021-03-11 19:13:27 +05:30
hash["#{event}_monthly"] = unique_events(**monthly_time_range.merge(event_names: [event]))
2020-11-24 15:15:51 +05:30
end
2021-11-11 11:23:49 +05:30
if eligible_for_totals?(events_names) && CATEGORIES_FOR_TOTALS.include?(category)
2021-03-11 19:13:27 +05:30
event_results["#{category}_total_unique_counts_weekly"] = unique_events(**weekly_time_range.merge(event_names: events_names))
event_results["#{category}_total_unique_counts_monthly"] = unique_events(**monthly_time_range.merge(event_names: events_names))
2020-11-24 15:15:51 +05:30
end
category_results["#{category}"] = event_results
end
end
2021-03-11 19:13:27 +05:30
def known_event?(event_name)
event_for(event_name).present?
2021-01-29 00:20:46 +05:30
end
def known_events
@known_events ||= load_events(KNOWN_EVENTS_PATH)
end
2021-03-11 19:13:27 +05:30
def calculate_events_union(event_names:, start_date:, end_date:)
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
2021-01-29 00:20:46 +05:30
end
2020-10-24 23:57:45 +05:30
private
2022-08-13 15:12:31 +05:30
def categories_pending_migration
2022-08-27 11:52:29 +05:30
(categories - categories_collected_from_metrics_definitions)
2022-08-13 15:12:31 +05:30
end
2021-03-08 18:12:59 +05:30
def track(values, event_name, context: '', time: Time.zone.now)
2021-10-27 15:23:28 +05:30
return unless ::ServicePing::ServicePingSettings.enabled?
2021-01-29 00:20:46 +05:30
event = event_for(event_name)
2021-04-17 20:07:23 +05:30
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(UnknownEvent.new("Unknown event #{event_name}")) unless event.present?
2021-01-29 00:20:46 +05:30
2021-03-11 19:13:27 +05:30
return unless feature_enabled?(event)
2021-03-08 18:12:59 +05:30
Gitlab::Redis::HLL.add(key: redis_key(event, time, context), value: values, expiry: expiry(event))
2021-06-08 01:23:25 +05:30
rescue StandardError => e
2021-04-17 20:07:23 +05:30
# Ignore any exceptions unless is dev or test env
# The application flow should not be blocked by erros in tracking
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
2021-01-29 00:20:46 +05:30
end
2021-03-08 18:12:59 +05:30
# The array of valid context on which we allow tracking
2021-01-29 00:20:46 +05:30
def valid_context_list
Plan.all_plans
end
def count_unique_events(event_names:, start_date:, end_date:, context: '')
events = events_for(Array(event_names).map(&:to_s))
yield events if block_given?
aggregation = events.first[:aggregation]
keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date, context: context)
2021-04-29 21:17:54 +05:30
return FALLBACK unless keys.any?
2021-01-29 00:20:46 +05:30
redis_usage_data { Gitlab::Redis::HLL.count(keys: keys) }
end
2021-03-11 19:13:27 +05:30
def feature_enabled?(event)
return true if event[:feature_flag].blank?
2022-07-16 23:28:13 +05:30
Feature.enabled?(event[:feature_flag]) && Feature.enabled?(:redis_hll_tracking, type: :ops)
2021-03-11 19:13:27 +05:30
end
2020-11-24 15:15:51 +05:30
# Allow to add totals for events that are in the same redis slot, category and have the same aggregation level
# and if there are more than 1 event
def eligible_for_totals?(events_names)
return false if events_names.size <= 1
events = events_for(events_names)
events_in_same_slot?(events) && events_in_same_category?(events) && events_same_aggregation?(events)
end
2021-01-29 00:20:46 +05:30
def keys_for_aggregation(aggregation, events:, start_date:, end_date:, context: '')
2020-10-24 23:57:45 +05:30
if aggregation.to_sym == :daily
2021-01-29 00:20:46 +05:30
daily_redis_keys(events: events, start_date: start_date, end_date: end_date, context: context)
2020-10-24 23:57:45 +05:30
else
2021-01-29 00:20:46 +05:30
weekly_redis_keys(events: events, start_date: start_date, end_date: end_date, context: context)
2020-10-24 23:57:45 +05:30
end
end
2021-01-29 00:20:46 +05:30
def load_events(wildcard)
Dir[wildcard].each_with_object([]) do |path, events|
events.push(*load_yaml_from_path(path))
end
end
def load_yaml_from_path(path)
YAML.safe_load(File.read(path))&.map(&:with_indifferent_access)
2020-10-24 23:57:45 +05:30
end
def known_events_names
known_events.map { |event| event[:name] }
end
def events_in_same_slot?(events)
2020-11-24 15:15:51 +05:30
# if we check one event then redis_slot is only one to check
2021-10-27 15:23:28 +05:30
return false if events.empty?
2020-11-24 15:15:51 +05:30
return true if events.size == 1
2020-10-24 23:57:45 +05:30
slot = events.first[:redis_slot]
2020-11-24 15:15:51 +05:30
events.all? { |event| event[:redis_slot].present? && event[:redis_slot] == slot }
2020-10-24 23:57:45 +05:30
end
def events_in_same_category?(events)
category = events.first[:category]
events.all? { |event| event[:category] == category }
end
def events_same_aggregation?(events)
aggregation = events.first[:aggregation]
events.all? { |event| event[:aggregation] == aggregation }
end
def expiry(event)
2020-11-24 15:15:51 +05:30
return event[:expiry].days if event[:expiry].present?
2020-10-24 23:57:45 +05:30
event[:aggregation].to_sym == :daily ? DEFAULT_DAILY_KEY_EXPIRY_LENGTH : DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH
end
def event_for(event_name)
2021-01-29 00:20:46 +05:30
known_events.find { |event| event[:name] == event_name.to_s }
2020-10-24 23:57:45 +05:30
end
def events_for(event_names)
known_events.select { |event| event_names.include?(event[:name]) }
end
def redis_slot(event)
event[:redis_slot] || DEFAULT_REDIS_SLOT
end
# Compose the key in order to store events daily or weekly
2021-01-29 00:20:46 +05:30
def redis_key(event, time, context = '')
2021-06-08 01:23:25 +05:30
raise UnknownEvent, "Unknown event #{event[:name]}" unless known_events_names.include?(event[:name].to_s)
raise UnknownAggregation, "Use :daily or :weekly aggregation" unless ALLOWED_AGGREGATIONS.include?(event[:aggregation].to_sym)
2020-10-24 23:57:45 +05:30
2021-01-29 00:20:46 +05:30
key = apply_slot(event)
key = apply_time_aggregation(key, time, event)
key = "#{context}_#{key}" if context.present?
key
end
def apply_slot(event)
2020-10-24 23:57:45 +05:30
slot = redis_slot(event)
2021-01-29 00:20:46 +05:30
if slot.present?
event[:name].to_s.gsub(slot, "{#{slot}}")
else
"{#{event[:name]}}"
end
end
2020-10-24 23:57:45 +05:30
2021-01-29 00:20:46 +05:30
def apply_time_aggregation(key, time, event)
2020-10-24 23:57:45 +05:30
if event[:aggregation].to_sym == :daily
year_day = time.strftime('%G-%j')
"#{year_day}-#{key}"
else
year_week = time.strftime('%G-%V')
"#{key}-#{year_week}"
end
end
2021-01-29 00:20:46 +05:30
def daily_redis_keys(events:, start_date:, end_date:, context: '')
2020-10-24 23:57:45 +05:30
(start_date.to_date..end_date.to_date).map do |date|
2021-01-29 00:20:46 +05:30
events.map { |event| redis_key(event, date, context) }
2020-10-24 23:57:45 +05:30
end.flatten
end
2021-01-29 00:20:46 +05:30
def weekly_redis_keys(events:, start_date:, end_date:, context: '')
2021-01-08 16:13:35 +05:30
end_date = end_date.end_of_week - 1.week
(start_date.to_date..end_date.to_date).map do |date|
2021-01-29 00:20:46 +05:30
events.map { |event| redis_key(event, date, context) }
2021-01-08 16:13:35 +05:30
end.flatten.uniq
2020-10-24 23:57:45 +05:30
end
end
end
end
end
2021-01-29 00:20:46 +05:30
2021-06-08 01:23:25 +05:30
Gitlab::UsageDataCounters::HLLRedisCounter.prepend_mod_with('Gitlab::UsageDataCounters::HLLRedisCounter')