2018-11-18 11:00:15 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Postgresql
|
2022-01-26 12:08:38 +05:30
|
|
|
class ReplicationSlot < Gitlab::Database::SharedModel
|
2018-11-18 11:00:15 +05:30
|
|
|
self.table_name = 'pg_replication_slots'
|
|
|
|
|
2018-12-13 13:39:08 +05:30
|
|
|
# Returns true if there are any replication slots in use.
|
|
|
|
# PostgreSQL-compatible databases such as Aurora don't support
|
|
|
|
# replication slots, so this will return false as well.
|
|
|
|
def self.in_use?
|
|
|
|
transaction { exists? }
|
|
|
|
rescue ActiveRecord::StatementInvalid
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
2018-11-18 11:00:15 +05:30
|
|
|
# Returns true if the lag observed across all replication slots exceeds a
|
|
|
|
# given threshold.
|
|
|
|
#
|
|
|
|
# max - The maximum replication lag size, in bytes. Based on GitLab.com
|
|
|
|
# statistics it takes between 1 and 5 seconds to replicate around
|
|
|
|
# 100 MB of data.
|
|
|
|
def self.lag_too_great?(max = 100.megabytes)
|
2018-12-13 13:39:08 +05:30
|
|
|
return false unless in_use?
|
|
|
|
|
2021-01-03 14:25:43 +05:30
|
|
|
lag_function = "pg_wal_lsn_diff" \
|
|
|
|
"(pg_current_wal_insert_lsn(), restart_lsn)::bigint"
|
2018-11-18 11:00:15 +05:30
|
|
|
|
|
|
|
# We force the use of a transaction here so the query always goes to the
|
2021-09-04 01:27:46 +05:30
|
|
|
# primary, even when using the DB load balancer.
|
|
|
|
sizes = transaction { pluck(Arel.sql(lag_function)) }
|
2019-09-30 21:07:59 +05:30
|
|
|
too_great = sizes.compact.count { |size| size >= max }
|
2018-11-18 11:00:15 +05:30
|
|
|
|
|
|
|
# If too many replicas are falling behind too much, the availability of a
|
|
|
|
# GitLab instance might suffer. To prevent this from happening we require
|
|
|
|
# at least 1 replica to have data recent enough.
|
2020-10-24 23:57:45 +05:30
|
|
|
if sizes.any? && too_great > 0
|
2018-11-18 11:00:15 +05:30
|
|
|
(sizes.length - too_great) <= 1
|
|
|
|
else
|
|
|
|
false
|
|
|
|
end
|
|
|
|
end
|
2021-10-27 15:23:28 +05:30
|
|
|
|
|
|
|
def self.count
|
|
|
|
connection
|
|
|
|
.execute("SELECT COUNT(*) FROM pg_replication_slots;")
|
|
|
|
.first
|
|
|
|
.fetch('count')
|
|
|
|
.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.unused_slots_count
|
|
|
|
connection
|
|
|
|
.execute("SELECT COUNT(*) FROM pg_replication_slots WHERE active = 'f';")
|
|
|
|
.first
|
|
|
|
.fetch('count')
|
|
|
|
.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.used_slots_count
|
|
|
|
connection
|
|
|
|
.execute("SELECT COUNT(*) FROM pg_replication_slots WHERE active = 't';")
|
|
|
|
.first
|
|
|
|
.fetch('count')
|
|
|
|
.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
# array of slots and the retained_bytes
|
|
|
|
# https://www.skillslogic.com/blog/databases/checking-postgres-replication-lag
|
|
|
|
# http://bdr-project.org/docs/stable/monitoring-peers.html
|
|
|
|
def self.slots_retained_bytes
|
|
|
|
connection.execute(<<-SQL.squish).to_a
|
|
|
|
SELECT slot_name, database,
|
|
|
|
active, pg_wal_lsn_diff(pg_current_wal_insert_lsn(), restart_lsn)
|
|
|
|
AS retained_bytes
|
|
|
|
FROM pg_replication_slots;
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
|
|
|
# returns the max number WAL space (in bytes) being used across the replication slots
|
|
|
|
def self.max_retained_wal
|
|
|
|
connection.execute(<<-SQL.squish).first.fetch('coalesce').to_i
|
|
|
|
SELECT COALESCE(MAX(pg_wal_lsn_diff(pg_current_wal_insert_lsn(), restart_lsn)), 0)
|
|
|
|
FROM pg_replication_slots;
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.max_replication_slots
|
|
|
|
connection.execute(<<-SQL.squish).first&.fetch('setting').to_i
|
|
|
|
SELECT setting FROM pg_settings WHERE name = 'max_replication_slots';
|
|
|
|
SQL
|
|
|
|
end
|
2018-11-18 11:00:15 +05:30
|
|
|
end
|
|
|
|
end
|