debian-mirror-gitlab/lib/gitlab/database.rb
2023-07-09 08:55:56 +05:30

414 lines
17 KiB
Ruby
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# frozen_string_literal: true
module Gitlab
module Database
DATABASE_NAMES = %w[main ci main_clusterwide].freeze
MAIN_DATABASE_NAME = 'main'
CI_DATABASE_NAME = 'ci'
DEFAULT_POOL_HEADROOM = 10
# This constant is used when renaming tables concurrently.
# If you plan to rename a table using the `rename_table_safely` method, add your table here one milestone before the rename.
# Example:
# TABLES_TO_BE_RENAMED = {
# 'old_name' => 'new_name'
# }.freeze
TABLES_TO_BE_RENAMED = {}.freeze
# Minimum PostgreSQL version requirement per documentation:
# https://docs.gitlab.com/ee/install/requirements.html#postgresql-requirements
MINIMUM_POSTGRES_VERSION = 13
# https://www.postgresql.org/docs/9.2/static/datatype-numeric.html
MAX_INT_VALUE = 2147483647
MIN_INT_VALUE = -2147483648
# The max value between MySQL's TIMESTAMP and PostgreSQL's timestampz:
# https://www.postgresql.org/docs/9.1/static/datatype-datetime.html
# https://dev.mysql.com/doc/refman/5.7/en/datetime.html
# FIXME: this should just be the max value of timestampz
MAX_TIMESTAMP_VALUE = Time.at((1 << 31) - 1).freeze
# The maximum number of characters for text fields, to avoid DoS attacks via parsing huge text fields
# https://gitlab.com/gitlab-org/gitlab-foss/issues/61974
MAX_TEXT_SIZE_LIMIT = 1_000_000
# Migrations before this version may have been removed
MIN_SCHEMA_GITLAB_VERSION = '15.11'
# Schema we store dynamically managed partitions in (e.g. for time partitioning)
DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic
# Schema we store static partitions in (e.g. for hash partitioning)
STATIC_PARTITIONS_SCHEMA = :gitlab_partitions_static
# This is an extensive list of postgres schemas owned by GitLab
# It does not include the default public schema
EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA, STATIC_PARTITIONS_SCHEMA].freeze
PRIMARY_DATABASE_NAME = ActiveRecord::Base.connection_db_config.name.to_sym # rubocop:disable Database/MultipleDatabases
FULLY_QUALIFIED_IDENTIFIER = /^\w+\.\w+$/
## Database Modes
MODE_SINGLE_DATABASE = "single-database"
MODE_SINGLE_DATABASE_CI_CONNECTION = "single-database-ci-connection"
MODE_MULTIPLE_DATABASES = "multiple-databases"
def self.database_base_models
@database_base_models ||= {
# Note that we use ActiveRecord::Base here and not ApplicationRecord.
# This is deliberate, as we also use these classes to apply load
# balancing to, and the load balancer must be enabled for _all_ models
# that inherit from ActiveRecord::Base; not just our own models that
# inherit from ApplicationRecord.
main: ::ActiveRecord::Base,
main_clusterwide: ::MainClusterwide::ApplicationRecord.connection_class? ? ::MainClusterwide::ApplicationRecord : nil,
ci: ::Ci::ApplicationRecord.connection_class? ? ::Ci::ApplicationRecord : nil
}.compact.with_indifferent_access.freeze
end
# This returns a list of databases that contains all the gitlab_shared schema
# tables. We can't reuse database_base_models because Geo does not support
# the gitlab_shared tables yet.
def self.database_base_models_with_gitlab_shared
@database_base_models_with_gitlab_shared ||= {
# Note that we use ActiveRecord::Base here and not ApplicationRecord.
# This is deliberate, as we also use these classes to apply load
# balancing to, and the load balancer must be enabled for _all_ models
# that inher from ActiveRecord::Base; not just our own models that
# inherit from ApplicationRecord.
main: ::ActiveRecord::Base,
main_clusterwide: ::MainClusterwide::ApplicationRecord.connection_class? ? ::MainClusterwide::ApplicationRecord : nil,
ci: ::Ci::ApplicationRecord.connection_class? ? ::Ci::ApplicationRecord : nil
}.compact.with_indifferent_access.freeze
end
# This returns a list of databases whose connection supports database load
# balancing. We can't reuse the database_base_models method because the Geo
# database does not support load balancing yet.
#
# TODO: https://gitlab.com/gitlab-org/geo-team/discussions/-/issues/5032
def self.database_base_models_using_load_balancing
@database_base_models_using_load_balancing ||= {
# Note that we use ActiveRecord::Base here and not ApplicationRecord.
# This is deliberate, as we also use these classes to apply load
# balancing to, and the load balancer must be enabled for _all_ models
# that inher from ActiveRecord::Base; not just our own models that
# inherit from ApplicationRecord.
main: ::ActiveRecord::Base,
main_clusterwide: ::MainClusterwide::ApplicationRecord.connection_class? ? ::MainClusterwide::ApplicationRecord : nil,
ci: ::Ci::ApplicationRecord.connection_class? ? ::Ci::ApplicationRecord : nil
}.compact.with_indifferent_access.freeze
end
# This returns a list of base models with connection associated for a given gitlab_schema
def self.schemas_to_base_models
@schemas_to_base_models ||= {
gitlab_main: [self.database_base_models.fetch(:main)],
gitlab_ci: [self.database_base_models[:ci] || self.database_base_models.fetch(:main)], # use CI or fallback to main
gitlab_shared: database_base_models_with_gitlab_shared.values, # all models
gitlab_internal: database_base_models.values, # all models
gitlab_pm: [self.database_base_models.fetch(:main)], # package metadata models
gitlab_main_clusterwide: [self.database_base_models[:main_clusterwide] || self.database_base_models.fetch(:main)]
}.with_indifferent_access.freeze
end
def self.all_database_names
DATABASE_NAMES
end
# We configure the database connection pool size automatically based on the
# configured concurrency. We also add some headroom, to make sure we don't
# run out of connections when more threads besides the 'user-facing' ones
# are running.
#
# Read more about this in
# doc/development/database/client_side_connection_pool.md
def self.default_pool_size
headroom =
(ENV["DB_POOL_HEADROOM"].presence || DEFAULT_POOL_HEADROOM).to_i
Gitlab::Runtime.max_threads + headroom
end
# Database configured. Returns true even if the database is shared
def self.has_config?(database_name)
ActiveRecord::Base.configurations
.configs_for(env_name: Rails.env, name: database_name.to_s, include_replicas: true)
.present?
end
# Database configured. Returns false if the database is shared
def self.has_database?(database_name)
db_config = ::Gitlab::Database.database_base_models[database_name]&.connection_db_config
db_config.present? && db_config_share_with(db_config).nil?
end
def self.database_mode
if !has_config?(CI_DATABASE_NAME)
MODE_SINGLE_DATABASE
elsif has_database?(CI_DATABASE_NAME)
MODE_MULTIPLE_DATABASES
else
MODE_SINGLE_DATABASE_CI_CONNECTION
end
end
class PgUser < ApplicationRecord
self.table_name = 'pg_user'
self.primary_key = :usename
end
# rubocop: disable CodeReuse/ActiveRecord
def self.check_for_non_superuser
user = PgUser.find_by('usename = CURRENT_USER')
am_i_superuser = user.usesuper
Gitlab::AppLogger.info(
"Account details: User: \"#{user.usename}\", UseSuper: (#{am_i_superuser})"
)
raise 'Error: detected superuser' if am_i_superuser
rescue ActiveRecord::StatementInvalid
raise 'User CURRENT_USER not found'
end
# rubocop: enable CodeReuse/ActiveRecord
def self.check_postgres_version_and_print_warning
return if Gitlab::Runtime.rails_runner?
database_base_models.each do |name, model|
database = Gitlab::Database::Reflection.new(model)
next if database.postgresql_minimum_supported_version?
Kernel.warn ERB.new(Rainbow.new.wrap(<<~EOS).red).result
******************************************************************************
You are using PostgreSQL #{database.version} for the #{name} database, but this version of GitLab requires PostgreSQL >= <%= Gitlab::Database::MINIMUM_POSTGRES_VERSION %>.
<% if Rails.env.development? || Rails.env.test? %>
If using gitlab-development-kit, please find the relevant steps here:
https://gitlab.com/gitlab-org/gitlab-development-kit/-/blob/main/doc/howto/postgresql.md#upgrade-postgresql
<% end %>
Please upgrade your environment to a supported PostgreSQL version. See
https://docs.gitlab.com/ee/install/requirements.html#database for details.
******************************************************************************
EOS
rescue ActiveRecord::ActiveRecordError, PG::Error
# ignore - happens when Rake tasks yet have to create a database, e.g. for testing
end
end
def self.random
"RANDOM()"
end
def self.true_value
"'t'"
end
def self.false_value
"'f'"
end
def self.sanitize_timestamp(timestamp)
MAX_TIMESTAMP_VALUE > timestamp ? timestamp : MAX_TIMESTAMP_VALUE.dup
end
def self.all_uncached(&block)
# Calls to #uncached only disable caching for the current connection. Since the load balancer
# can potentially upgrade from read to read-write mode (using a different connection), we specify
# up-front that we'll explicitly use the primary for the duration of the operation.
Gitlab::Database::LoadBalancing::Session.current.use_primary do
base_models = database_base_models_using_load_balancing.values
base_models.reduce(block) { |blk, model| -> { model.uncached(&blk) } }.call
end
end
def self.allow_cross_joins_across_databases(url:)
# this method is implemented in:
# spec/support/database/prevent_cross_joins.rb
yield
end
def self.add_post_migrate_path_to_rails(force: false)
return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force
Rails.application.config.paths['db'].each do |db_path|
path = Rails.root.join(db_path, 'post_migrate').to_s
next if Rails.application.config.paths['db/migrate'].include? path
Rails.application.config.paths['db/migrate'] << path
# Rails memoizes migrations at certain points where it won't read the above
# path just yet. As such we must also update the following list of paths.
ActiveRecord::Migrator.migrations_paths << path
end
end
def self.db_config_names
::ActiveRecord::Base.configurations.configs_for(env_name: Rails.env).map(&:name) - ['geo']
end
# This returns all matching schemas that a given connection can use
# Since the `ActiveRecord::Base` might change the connection (from main to ci)
# This does not look at literal connection names, but rather compares
# models that are holders for a given db_config_name
def self.gitlab_schemas_for_connection(connection)
db_config = self.db_config_for_connection(connection)
# connection might not be yet adopted (returning NullPool, and no connection_klass)
# in such cases it is fine to ignore such connections
return unless db_config
db_config_name = db_config.name.delete_suffix(LoadBalancing::LoadBalancer::REPLICA_SUFFIX)
primary_model = self.database_base_models.fetch(db_config_name.to_sym)
self.schemas_to_base_models.select do |_, child_models|
child_models.any? do |child_model|
child_model == primary_model || \
# The model might indicate a child connection, ensure that this is enclosed in a `db_config`
self.database_base_models[self.db_config_share_with(child_model.connection_db_config)] == primary_model
end
end.keys.map!(&:to_sym)
end
def self.db_config_for_connection(connection)
return unless connection
# For a ConnectionProxy we want to avoid ambiguous db_config as it may
# sometimes default to replica so we always return the primary config
# instead.
if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy)
return connection.load_balancer.configuration.db_config
end
# During application init we might receive `NullPool`
return unless connection.respond_to?(:pool) &&
connection.pool.respond_to?(:db_config)
connection.pool.db_config
end
# At the moment, the connection can only be retrieved by
# Gitlab::Database::LoadBalancer#read or #read_write or from the
# ActiveRecord directly. Therefore, if the load balancer doesn't
# recognize the connection, this method returns the primary role
# directly. In future, we may need to check for other sources.
# Expected returned names:
# main, main_replica, ci, ci_replica, unknown
def self.db_config_name(connection)
db_config = db_config_for_connection(connection)
db_config&.name || 'unknown'
end
# Currently the database configuration can only be shared with `main:`
# If the `database_tasks: false` is being used
# This is to be refined: https://gitlab.com/gitlab-org/gitlab/-/issues/356580
def self.db_config_share_with(db_config)
if db_config.database_tasks?
nil # no sharing
else
'main' # share with `main:`
end
end
def self.read_only?
false
end
def self.read_write?
!read_only?
end
# Determines minimum viable migration version, determined by the timestamp
# of the earliest migration file.
def self.read_minimum_migration_version
Dir.open(
Rails.root.join('db/migrate')
).filter_map { |f| /\A\d{14}/.match(f)&.to_s }.map(&:to_i).min
end
# Monkeypatch rails with upgraded database observability
def self.install_transaction_metrics_patches!
ActiveRecord::Base.prepend(ActiveRecordBaseTransactionMetrics)
end
def self.install_transaction_context_patches!
ActiveRecord::ConnectionAdapters::TransactionManager
.prepend(TransactionManagerContext)
ActiveRecord::ConnectionAdapters::RealTransaction
.prepend(RealTransactionContext)
end
# MonkeyPatch for ActiveRecord::Base for adding observability
module ActiveRecordBaseTransactionMetrics
extend ActiveSupport::Concern
class_methods do
# A patch over ApplicationRecord.transaction that provides
# observability into transactional methods.
def transaction(**options, &block)
transaction_type = get_transaction_type(connection.transaction_open?, options[:requires_new])
::Gitlab::Database::Metrics.subtransactions_increment(self.name) if transaction_type == :sub_transaction
payload = { connection: connection, transaction_type: transaction_type }
ActiveSupport::Notifications.instrument('transaction.active_record', payload) do
super(**options, &block)
end
end
private
def get_transaction_type(transaction_open, requires_new_flag)
if transaction_open
return :sub_transaction if requires_new_flag
return :fake_transaction
end
:real_transaction
end
end
end
# rubocop:disable Gitlab/ModuleWithInstanceVariables
module TransactionManagerContext
def transaction_context
@stack.first.try(:gitlab_transaction_context)
end
end
module RealTransactionContext
def gitlab_transaction_context
@gitlab_transaction_context ||= ::Gitlab::Database::Transaction::Context.new
end
def commit
gitlab_transaction_context.commit
super
end
def rollback
gitlab_transaction_context.rollback
super
end
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
end
end
Gitlab::Database.prepend_mod_with('Gitlab::Database')