2022-05-07 20:08:51 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
class Analytics::CycleAnalytics::Aggregation < ApplicationRecord
|
|
|
|
include FromUnion
|
2023-03-17 16:20:25 +05:30
|
|
|
include Analytics::CycleAnalytics::Parentable
|
2022-05-07 20:08:51 +05:30
|
|
|
|
2022-06-21 17:19:12 +05:30
|
|
|
validates :incremental_runtimes_in_seconds, :incremental_processed_records, :full_runtimes_in_seconds, :full_processed_records, presence: true, length: { maximum: 10 }, allow_blank: true
|
2022-05-07 20:08:51 +05:30
|
|
|
|
2023-03-04 22:38:38 +05:30
|
|
|
scope :priority_order, ->(column_to_sort = :last_incremental_run_at) { order(arel_table[column_to_sort].asc.nulls_first) }
|
2022-05-07 20:08:51 +05:30
|
|
|
scope :enabled, -> { where('enabled IS TRUE') }
|
|
|
|
|
2022-06-21 17:19:12 +05:30
|
|
|
def cursor_for(mode, model)
|
|
|
|
{
|
|
|
|
updated_at: self["last_#{mode}_#{model.table_name}_updated_at"],
|
|
|
|
id: self["last_#{mode}_#{model.table_name}_id"]
|
|
|
|
}.compact
|
|
|
|
end
|
|
|
|
|
2022-07-16 23:28:13 +05:30
|
|
|
def consistency_check_cursor_for(model)
|
|
|
|
{
|
|
|
|
:start_event_timestamp => self["last_consistency_check_#{model.issuable_model.table_name}_start_event_timestamp"],
|
|
|
|
:end_event_timestamp => self["last_consistency_check_#{model.issuable_model.table_name}_end_event_timestamp"],
|
|
|
|
model.issuable_id_column => self["last_consistency_check_#{model.issuable_model.table_name}_issuable_id"]
|
|
|
|
}.compact
|
|
|
|
end
|
|
|
|
|
2022-06-21 17:19:12 +05:30
|
|
|
def refresh_last_run(mode)
|
|
|
|
self["last_#{mode}_run_at"] = Time.current
|
|
|
|
end
|
|
|
|
|
|
|
|
def reset_full_run_cursors
|
|
|
|
self.last_full_issues_id = nil
|
|
|
|
self.last_full_issues_updated_at = nil
|
|
|
|
self.last_full_merge_requests_id = nil
|
|
|
|
self.last_full_merge_requests_updated_at = nil
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_cursor(mode, model, cursor)
|
|
|
|
self["last_#{mode}_#{model.table_name}_id"] = cursor[:id]
|
|
|
|
self["last_#{mode}_#{model.table_name}_updated_at"] = cursor[:updated_at]
|
|
|
|
end
|
|
|
|
|
|
|
|
def set_stats(mode, runtime, processed_records)
|
|
|
|
# We only store the last 10 data points
|
|
|
|
self["#{mode}_runtimes_in_seconds"] = (self["#{mode}_runtimes_in_seconds"] + [runtime]).last(10)
|
|
|
|
self["#{mode}_processed_records"] = (self["#{mode}_processed_records"] + [processed_records]).last(10)
|
|
|
|
end
|
|
|
|
|
2022-05-07 20:08:51 +05:30
|
|
|
def estimated_next_run_at
|
|
|
|
return unless enabled
|
|
|
|
return if last_incremental_run_at.nil?
|
|
|
|
|
|
|
|
estimation = duration_until_the_next_aggregation_job +
|
|
|
|
average_aggregation_duration +
|
|
|
|
(last_incremental_run_at - earliest_last_run_at)
|
|
|
|
|
|
|
|
estimation < 1 ? nil : estimation.from_now
|
|
|
|
end
|
|
|
|
|
2023-03-17 16:20:25 +05:30
|
|
|
def self.safe_create_for_namespace(group_or_project_namespace)
|
|
|
|
# Namespaces::ProjectNamespace has no root_ancestor
|
|
|
|
# Related: https://gitlab.com/gitlab-org/gitlab/-/issues/386124
|
|
|
|
group = group_or_project_namespace.is_a?(Group) ? group_or_project_namespace : group_or_project_namespace.parent
|
2022-05-07 20:08:51 +05:30
|
|
|
top_level_group = group.root_ancestor
|
|
|
|
aggregation = find_by(group_id: top_level_group.id)
|
2023-04-23 21:23:45 +05:30
|
|
|
return aggregation if aggregation&.enabled?
|
2022-05-07 20:08:51 +05:30
|
|
|
|
2023-04-23 21:23:45 +05:30
|
|
|
# At this point we're sure that the group is licensed, we can always enable the aggregation.
|
|
|
|
# This re-enables the aggregation in case the group downgraded and later upgraded the license.
|
|
|
|
upsert({ group_id: top_level_group.id, enabled: true })
|
|
|
|
|
|
|
|
find(top_level_group.id)
|
2022-05-07 20:08:51 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
# The aggregation job is scheduled every 10 minutes: */10 * * * *
|
|
|
|
def duration_until_the_next_aggregation_job
|
|
|
|
(10 - (DateTime.current.minute % 10)).minutes.seconds
|
|
|
|
end
|
|
|
|
|
|
|
|
def average_aggregation_duration
|
|
|
|
return 0.seconds if incremental_runtimes_in_seconds.empty?
|
|
|
|
|
|
|
|
average = incremental_runtimes_in_seconds.sum.fdiv(incremental_runtimes_in_seconds.size)
|
|
|
|
average.seconds
|
|
|
|
end
|
|
|
|
|
|
|
|
def earliest_last_run_at
|
|
|
|
max = self.class.select(:last_incremental_run_at)
|
|
|
|
.where(enabled: true)
|
|
|
|
.where.not(last_incremental_run_at: nil)
|
|
|
|
.priority_order
|
|
|
|
.limit(1)
|
|
|
|
.to_sql
|
|
|
|
|
|
|
|
connection.select_value("(#{max})")
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.load_batch(last_run_at, column_to_query = :last_incremental_run_at, batch_size = 100)
|
|
|
|
last_run_at_not_set = Analytics::CycleAnalytics::Aggregation
|
|
|
|
.enabled
|
|
|
|
.where(column_to_query => nil)
|
|
|
|
.priority_order(column_to_query)
|
|
|
|
.limit(batch_size)
|
|
|
|
|
|
|
|
last_run_at_before = Analytics::CycleAnalytics::Aggregation
|
|
|
|
.enabled
|
|
|
|
.where(arel_table[column_to_query].lt(last_run_at))
|
|
|
|
.priority_order(column_to_query)
|
|
|
|
.limit(batch_size)
|
|
|
|
|
|
|
|
Analytics::CycleAnalytics::Aggregation
|
|
|
|
.from_union([last_run_at_not_set, last_run_at_before], remove_order: false, remove_duplicates: false)
|
|
|
|
.limit(batch_size)
|
|
|
|
end
|
|
|
|
end
|