debian-mirror-gitlab/lib/gitlab/database/reindexing.rb

98 lines
4 KiB
Ruby
Raw Normal View History

2021-01-03 14:25:43 +05:30
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
2021-02-22 17:27:13 +05:30
# Number of indexes to reindex per invocation
DEFAULT_INDEXES_PER_INVOCATION = 2
2021-09-30 23:02:18 +05:30
SUPPORTED_TYPES = %w(btree gist).freeze
2021-10-27 15:23:28 +05:30
# When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
# which only conflicts with DDL and vacuum. We therefore execute this with a rather
# high lock timeout and a long pause in between retries. This is an alternative to
# setting a high statement timeout, which would lead to a long running query with effects
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
2022-01-26 12:08:38 +05:30
def self.enabled?
Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
end
def self.invoke(database = nil)
Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
next if database && database.to_s != connection_name.to_s
Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
# Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
automatic_reindexing
end
rescue StandardError => e
Gitlab::AppLogger.error(e)
raise
end
2021-12-11 22:18:48 +05:30
# Performs automatic reindexing for a limited number of indexes per call
# 1. Consume from the explicit reindexing queue
# 2. Apply bloat heuristic to find most bloated indexes and reindex those
def self.automatic_reindexing(maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
cleanup_leftovers!
# Consume from the explicit reindexing queue first
done_counter = perform_from_queue(maximum_records: maximum_records)
return if done_counter >= maximum_records
# Execute reindexing based on bloat heuristic
perform_with_heuristic(maximum_records: maximum_records - done_counter)
end
# Reindex based on bloat heuristic for a limited number of indexes per call
#
# We use a bloat heuristic to estimate the index bloat and pick the
# most bloated indexes for reindexing.
def self.perform_with_heuristic(candidate_indexes = Gitlab::Database::PostgresIndex.reindexing_support, maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
IndexSelection.new(candidate_indexes).take(maximum_records).each do |index|
2021-03-08 18:12:59 +05:30
Coordinator.new(index).perform
end
2021-01-03 14:25:43 +05:30
end
2021-12-11 22:18:48 +05:30
# Reindex indexes that have been explicitly enqueued (for a limited number of indexes per call)
def self.perform_from_queue(maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
QueuedAction.in_queue_order.limit(maximum_records).each do |queued_entry|
Coordinator.new(queued_entry.index).perform
queued_entry.done!
rescue StandardError => e
queued_entry.failed!
Gitlab::AppLogger.error("Failed to perform reindexing action on queued entry #{queued_entry}: #{e}")
end.size
end
2021-10-27 15:23:28 +05:30
def self.cleanup_leftovers!
PostgresIndex.reindexing_leftovers.each do |index|
Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
2021-12-11 22:18:48 +05:30
connection: index.connection,
2021-10-27 15:23:28 +05:30
timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class,
logger: Gitlab::AppLogger
)
retries.run(raise_on_exhaustion: false) do
2021-12-11 22:18:48 +05:30
index.connection.tap do |conn|
2021-10-27 15:23:28 +05:30
conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}")
end
end
end
2021-01-03 14:25:43 +05:30
end
end
end
end