debian-mirror-gitlab/lib/gitlab/database/reindexing/index_selection.rb
2023-03-17 16:20:25 +05:30

58 lines
1.8 KiB
Ruby

# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class IndexSelection
include Enumerable
# Only reindex indexes with a relative bloat level (bloat estimate / size) higher than this
MINIMUM_RELATIVE_BLOAT = 0.2
# Only consider indexes beyond this size (before reindexing)
INDEX_SIZE_MINIMUM = 1.gigabyte
VERY_LARGE_TABLES = %i[
ci_builds
].freeze
delegate :each, to: :indexes
def initialize(candidates)
@candidates = candidates
end
private
attr_reader :candidates
def indexes
# This is an explicit N+1 query:
# Bloat estimates are generally available through a view
# for all indexes. However, estimating bloat for all
# indexes at once is an expensive operation. Therefore,
# we force a N+1 pattern here and estimate bloat on a per-index
# basis.
@indexes ||= relations_that_need_cleaning_before_deadline
.sort_by(&:relative_bloat_level) # forced N+1
.reverse
.select { |candidate| candidate.relative_bloat_level >= MINIMUM_RELATIVE_BLOAT }
end
def relations_that_need_cleaning_before_deadline
relation = candidates.not_recently_reindexed.where('ondisk_size_bytes >= ?', INDEX_SIZE_MINIMUM)
relation = relation.where.not(tablename: VERY_LARGE_TABLES) if too_late_for_very_large_table?
relation
end
# The reindexing process takes place during the weekends and starting a
# reindexing action on a large table late on Sunday could span during
# Monday. We don't want this because it prevents vacuum from running.
def too_late_for_very_large_table?
!Date.today.saturday?
end
end
end
end
end