debian-mirror-gitlab/lib/gitlab/utils/usage_data.rb

337 lines
12 KiB
Ruby
Raw Normal View History

2020-06-23 00:09:42 +05:30
# frozen_string_literal: true
# Usage data utilities
#
# * distinct_count(relation, column = nil, batch: true, start: nil, finish: nil)
# Does a distinct batch count, smartly reduces batch_size and handles errors
#
# Examples:
# issues_using_zoom_quick_actions: distinct_count(ZoomMeeting, :issue_id),
#
# * count(relation, column = nil, batch: true, start: nil, finish: nil)
# Does a non-distinct batch count, smartly reduces batch_size and handles errors
#
# Examples:
# active_user_count: count(User.active)
#
# * alt_usage_data method
# handles StandardError and fallbacks by default into -1 this way not all measures fail if we encounter one exception
# there might be cases where we need to set a specific fallback in order to be aligned wih what version app is expecting as a type
#
# Examples:
# alt_usage_data { Gitlab::VERSION }
# alt_usage_data { Gitlab::CurrentSettings.uuid }
# alt_usage_data(fallback: nil) { Gitlab.config.registry.enabled }
#
# * redis_usage_data method
2021-04-17 20:07:23 +05:30
# handles ::Redis::CommandError, Gitlab::UsageDataCounters::BaseCounter::UnknownEvent,
# Gitlab::UsageDataCounters::HLLRedisCounter::EventError
2020-06-23 00:09:42 +05:30
# returns -1 when a block is sent or hash with all values -1 when a counter is sent
# different behaviour due to 2 different implementations of redis counter
#
# Examples:
# redis_usage_data(Gitlab::UsageDataCounters::WikiPageCounter)
2022-07-16 23:28:13 +05:30
# redis_usage_data { Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'users_expanding_vulnerabilities', start_date: 28.days.ago, end_date: Date.current) }
2020-06-23 00:09:42 +05:30
module Gitlab
module Utils
module UsageData
2021-04-29 21:17:54 +05:30
include Gitlab::Utils::StrongMemoize
2020-06-23 00:09:42 +05:30
extend self
FALLBACK = -1
2021-04-17 20:07:23 +05:30
HISTOGRAM_FALLBACK = { '-1' => -1 }.freeze
2021-02-22 17:27:13 +05:30
DISTRIBUTED_HLL_FALLBACK = -2
2021-04-17 20:07:23 +05:30
MAX_BUCKET_SIZE = 100
2021-12-11 22:18:48 +05:30
2022-07-16 23:28:13 +05:30
def with_duration
yield
end
2021-12-11 22:18:48 +05:30
def add_metric(metric, time_frame: 'none', options: {})
2021-10-27 15:23:28 +05:30
metric_class = "Gitlab::Usage::Metrics::Instrumentations::#{metric}".constantize
2021-12-11 22:18:48 +05:30
metric_class.new(time_frame: time_frame, options: options).value
2021-10-27 15:23:28 +05:30
end
2022-07-16 23:28:13 +05:30
def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil, start_at: Time.current)
with_duration do
if batch
Gitlab::Database::BatchCount.batch_count(relation, column, batch_size: batch_size, start: start, finish: finish)
else
relation.count
end
rescue ActiveRecord::StatementInvalid => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
FALLBACK
2020-06-23 00:09:42 +05:30
end
end
def distinct_count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil)
2022-07-16 23:28:13 +05:30
with_duration do
if batch
Gitlab::Database::BatchCount.batch_distinct_count(relation, column, batch_size: batch_size, start: start, finish: finish)
else
relation.distinct_count_by(column)
end
rescue ActiveRecord::StatementInvalid => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
FALLBACK
2020-06-23 00:09:42 +05:30
end
end
2021-02-22 17:27:13 +05:30
def estimate_batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
2022-07-16 23:28:13 +05:30
with_duration do
buckets = Gitlab::Database::PostgresHll::BatchDistinctCounter
.new(relation, column)
.execute(batch_size: batch_size, start: start, finish: finish)
yield buckets if block_given?
buckets.estimated_distinct_count
rescue ActiveRecord::StatementInvalid => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
FALLBACK
end
2021-02-22 17:27:13 +05:30
end
2020-11-24 15:15:51 +05:30
def sum(relation, column, batch_size: nil, start: nil, finish: nil)
2022-07-16 23:28:13 +05:30
with_duration do
Gitlab::Database::BatchCount.batch_sum(relation, column, batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
FALLBACK
end
2020-11-24 15:15:51 +05:30
end
2022-07-23 23:45:48 +05:30
def average(relation, column, batch_size: nil, start: nil, finish: nil)
with_duration do
Gitlab::Database::BatchCount.batch_average(relation, column, batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
FALLBACK
end
end
2021-04-17 20:07:23 +05:30
# We don't support batching with histograms.
# Please avoid using this method on large tables.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/323949.
#
# rubocop: disable CodeReuse/ActiveRecord
def histogram(relation, column, buckets:, bucket_size: buckets.size)
2022-07-16 23:28:13 +05:30
with_duration do
# Using lambda to avoid exposing histogram specific methods
parameters_valid = lambda do
error_message =
if buckets.first == buckets.last
'Lower bucket bound cannot equal to upper bucket bound'
elsif bucket_size == 0
'Bucket size cannot be zero'
elsif bucket_size > MAX_BUCKET_SIZE
"Bucket size #{bucket_size} exceeds the limit of #{MAX_BUCKET_SIZE}"
end
break true unless error_message
exception = ArgumentError.new(error_message)
exception.set_backtrace(caller)
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(exception)
false
end
2021-04-17 20:07:23 +05:30
2022-07-16 23:28:13 +05:30
break HISTOGRAM_FALLBACK unless parameters_valid.call
count_grouped = relation.group(column).select(Arel.star.count.as('count_grouped'))
cte = Gitlab::SQL::CTE.new(:count_cte, count_grouped)
# For example, 9 segments gives 10 buckets
bucket_segments = bucket_size - 1
width_bucket = Arel::Nodes::NamedFunction
.new('WIDTH_BUCKET', [cte.table[:count_grouped], buckets.first, buckets.last, bucket_segments])
.as('buckets')
query = cte
.table
.project(width_bucket, cte.table[:count])
.group('buckets')
.order('buckets')
.with(cte.to_arel)
# Return the histogram as a Hash because buckets are unique.
relation
.connection
.exec_query(query.to_sql)
.rows
.to_h
# Keys are converted to strings in Usage Ping JSON
.stringify_keys
rescue ActiveRecord::StatementInvalid => e
Gitlab::AppJsonLogger.error(
event: 'histogram',
relation: relation.table_name,
operation: 'histogram',
operation_args: [column, buckets.first, buckets.last, bucket_segments],
query: query.to_sql,
message: e.message
)
# Raises error for dev env
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
HISTOGRAM_FALLBACK
2021-04-17 20:07:23 +05:30
end
end
# rubocop: enable CodeReuse/ActiveRecord
def add(*args)
2022-07-16 23:28:13 +05:30
with_duration do
break -1 if args.any?(&:negative?)
2021-04-17 20:07:23 +05:30
2022-07-16 23:28:13 +05:30
args.sum
rescue StandardError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
FALLBACK
end
2021-04-17 20:07:23 +05:30
end
2020-06-23 00:09:42 +05:30
def alt_usage_data(value = nil, fallback: FALLBACK, &block)
2022-07-16 23:28:13 +05:30
with_duration do
2022-08-27 11:52:29 +05:30
if block
2022-07-16 23:28:13 +05:30
yield
else
value
end
rescue StandardError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
fallback
2020-06-23 00:09:42 +05:30
end
end
def redis_usage_data(counter = nil, &block)
2022-07-16 23:28:13 +05:30
with_duration do
2022-08-27 11:52:29 +05:30
if block
2022-07-16 23:28:13 +05:30
redis_usage_counter(&block)
elsif counter.present?
redis_usage_data_totals(counter)
end
2020-06-23 00:09:42 +05:30
end
end
2021-04-17 20:07:23 +05:30
def with_prometheus_client(fallback: {}, verify: true)
2022-07-16 23:28:13 +05:30
with_duration do
client = prometheus_client(verify: verify)
break fallback unless client
2020-07-28 23:09:34 +05:30
2022-07-16 23:28:13 +05:30
yield client
rescue StandardError
fallback
end
2020-06-23 00:09:42 +05:30
end
def measure_duration
result = nil
duration = Benchmark.realtime do
result = yield
end
[result, duration]
end
2020-07-28 23:09:34 +05:30
def with_finished_at(key, &block)
2020-10-24 23:57:45 +05:30
yield.merge(key => Time.current)
2020-07-28 23:09:34 +05:30
end
2020-11-24 15:15:51 +05:30
# @param event_name [String] the event name
# @param values [Array|String] the values counted
def track_usage_event(event_name, values)
2021-03-08 18:12:59 +05:30
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values)
2020-11-24 15:15:51 +05:30
end
2021-06-08 01:23:25 +05:30
def maximum_id(model, column = nil)
key = :"#{model.name.downcase.gsub('::', '_')}_maximum_id"
column_to_read = column || :id
2021-04-29 21:17:54 +05:30
strong_memoize(key) do
2021-06-08 01:23:25 +05:30
model.maximum(column_to_read)
2021-04-29 21:17:54 +05:30
end
end
2021-06-08 01:23:25 +05:30
# rubocop: disable UsageData/LargeTable:
2021-09-30 23:02:18 +05:30
def jira_integration_data
2022-07-16 23:28:13 +05:30
with_duration do
data = {
projects_jira_server_active: 0,
projects_jira_cloud_active: 0
}
# rubocop: disable CodeReuse/ActiveRecord
::Integrations::Jira.active.includes(:jira_tracker_data).find_in_batches(batch_size: 100) do |services|
counts = services.group_by do |service|
# TODO: Simplify as part of https://gitlab.com/gitlab-org/gitlab/issues/29404
service_url = service.data_fields&.url || (service.properties && service.properties['url'])
service_url&.include?('.atlassian.net') ? :cloud : :server
end
data[:projects_jira_server_active] += counts[:server].size if counts[:server]
data[:projects_jira_cloud_active] += counts[:cloud].size if counts[:cloud]
2021-06-08 01:23:25 +05:30
end
2022-07-16 23:28:13 +05:30
data
2021-06-08 01:23:25 +05:30
end
end
2022-07-16 23:28:13 +05:30
2021-06-08 01:23:25 +05:30
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: enable UsageData/LargeTable:
def minimum_id(model, column = nil)
key = :"#{model.name.downcase.gsub('::', '_')}_minimum_id"
column_to_read = column || :id
2021-04-29 21:17:54 +05:30
strong_memoize(key) do
2021-06-08 01:23:25 +05:30
model.minimum(column_to_read)
2021-04-29 21:17:54 +05:30
end
end
2020-06-23 00:09:42 +05:30
private
2020-11-24 15:15:51 +05:30
def prometheus_client(verify:)
server_address = prometheus_server_address
return unless server_address
# There really is not a way to discover whether a Prometheus connection is using TLS or not
# Try TLS first because HTTPS will return fast if failed.
%w[https http].find do |scheme|
api_url = "#{scheme}://#{server_address}"
client = Gitlab::PrometheusClient.new(api_url, allow_local_requests: true, verify: verify)
break client if client.ready?
2021-06-08 01:23:25 +05:30
rescue StandardError
2020-11-24 15:15:51 +05:30
nil
end
end
def prometheus_server_address
if Gitlab::Prometheus::Internal.prometheus_enabled?
2021-03-08 18:12:59 +05:30
# Stripping protocol from URI
Gitlab::Prometheus::Internal.uri&.strip&.sub(%r{^https?://}, '')
2020-11-24 15:15:51 +05:30
elsif Gitlab::Consul::Internal.api_url
Gitlab::Consul::Internal.discover_prometheus_server_address
end
end
2020-06-23 00:09:42 +05:30
def redis_usage_counter
yield
2022-03-02 08:16:31 +05:30
rescue ::Redis::CommandError, Gitlab::UsageDataCounters::BaseCounter::UnknownEvent, Gitlab::UsageDataCounters::HLLRedisCounter::EventError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
2020-06-23 00:09:42 +05:30
FALLBACK
end
def redis_usage_data_totals(counter)
counter.totals
2022-03-02 08:16:31 +05:30
rescue ::Redis::CommandError, Gitlab::UsageDataCounters::BaseCounter::UnknownEvent => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
2020-06-23 00:09:42 +05:30
counter.fallback_totals
end
end
end
end