2018-12-13 13:39:08 +05:30
# frozen_string_literal: true
2015-10-24 18:46:33 +05:30
module Gitlab
module Database
2023-04-23 21:23:45 +05:30
DATABASE_NAMES = %w[ main ci main_clusterwide ] . freeze
2021-11-11 11:23:49 +05:30
MAIN_DATABASE_NAME = 'main'
2021-09-30 23:02:18 +05:30
CI_DATABASE_NAME = 'ci'
2021-11-11 11:23:49 +05:30
DEFAULT_POOL_HEADROOM = 10
2021-09-30 23:02:18 +05:30
2021-06-08 01:23:25 +05:30
# This constant is used when renaming tables concurrently.
# If you plan to rename a table using the `rename_table_safely` method, add your table here one milestone before the rename.
# Example:
# TABLES_TO_BE_RENAMED = {
# 'old_name' => 'new_name'
# }.freeze
2021-09-30 23:02:18 +05:30
TABLES_TO_BE_RENAMED = { } . freeze
2021-06-08 01:23:25 +05:30
2020-07-28 23:09:34 +05:30
# Minimum PostgreSQL version requirement per documentation:
# https://docs.gitlab.com/ee/install/requirements.html#postgresql-requirements
2023-07-09 08:55:56 +05:30
MINIMUM_POSTGRES_VERSION = 13
2020-07-28 23:09:34 +05:30
2016-06-22 15:30:34 +05:30
# https://www.postgresql.org/docs/9.2/static/datatype-numeric.html
MAX_INT_VALUE = 2147483647
2020-10-24 23:57:45 +05:30
MIN_INT_VALUE = - 2147483648
2019-10-12 21:52:04 +05:30
2018-03-17 18:26:18 +05:30
# The max value between MySQL's TIMESTAMP and PostgreSQL's timestampz:
# https://www.postgresql.org/docs/9.1/static/datatype-datetime.html
# https://dev.mysql.com/doc/refman/5.7/en/datetime.html
2019-10-12 21:52:04 +05:30
# FIXME: this should just be the max value of timestampz
2018-03-17 18:26:18 +05:30
MAX_TIMESTAMP_VALUE = Time . at ( ( 1 << 31 ) - 1 ) . freeze
2016-06-22 15:30:34 +05:30
2019-09-04 21:01:54 +05:30
# The maximum number of characters for text fields, to avoid DoS attacks via parsing huge text fields
2019-12-04 20:38:33 +05:30
# https://gitlab.com/gitlab-org/gitlab-foss/issues/61974
2019-09-04 21:01:54 +05:30
MAX_TEXT_SIZE_LIMIT = 1_000_000
2019-09-30 21:07:59 +05:30
# Migrations before this version may have been removed
2023-07-09 08:55:56 +05:30
MIN_SCHEMA_GITLAB_VERSION = '15.11'
2019-09-30 21:07:59 +05:30
2020-07-28 23:09:34 +05:30
# Schema we store dynamically managed partitions in (e.g. for time partitioning)
DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic
# Schema we store static partitions in (e.g. for hash partitioning)
STATIC_PARTITIONS_SCHEMA = :gitlab_partitions_static
# This is an extensive list of postgres schemas owned by GitLab
# It does not include the default public schema
EXTRA_SCHEMAS = [ DYNAMIC_PARTITIONS_SCHEMA , STATIC_PARTITIONS_SCHEMA ] . freeze
2022-07-16 23:28:13 +05:30
PRIMARY_DATABASE_NAME = ActiveRecord :: Base . connection_db_config . name . to_sym # rubocop:disable Database/MultipleDatabases
2021-06-08 01:23:25 +05:30
2023-04-23 21:23:45 +05:30
FULLY_QUALIFIED_IDENTIFIER = / ^ \ w+ \ . \ w+$ /
2023-06-20 00:43:36 +05:30
## Database Modes
MODE_SINGLE_DATABASE = " single-database "
MODE_SINGLE_DATABASE_CI_CONNECTION = " single-database-ci-connection "
MODE_MULTIPLE_DATABASES = " multiple-databases "
2021-11-18 22:05:49 +05:30
def self . database_base_models
@database_base_models || = {
# Note that we use ActiveRecord::Base here and not ApplicationRecord.
# This is deliberate, as we also use these classes to apply load
# balancing to, and the load balancer must be enabled for _all_ models
2023-01-13 00:05:48 +05:30
# that inherit from ActiveRecord::Base; not just our own models that
2021-11-18 22:05:49 +05:30
# inherit from ApplicationRecord.
main : :: ActiveRecord :: Base ,
2023-04-23 21:23:45 +05:30
main_clusterwide : :: MainClusterwide :: ApplicationRecord . connection_class? ? :: MainClusterwide :: ApplicationRecord : nil ,
2021-12-11 22:18:48 +05:30
ci : :: Ci :: ApplicationRecord . connection_class? ? :: Ci :: ApplicationRecord : nil
} . compact . with_indifferent_access . freeze
2017-08-17 22:00:37 +05:30
end
2022-08-13 15:12:31 +05:30
# This returns a list of databases that contains all the gitlab_shared schema
# tables. We can't reuse database_base_models because Geo does not support
# the gitlab_shared tables yet.
def self . database_base_models_with_gitlab_shared
@database_base_models_with_gitlab_shared || = {
# Note that we use ActiveRecord::Base here and not ApplicationRecord.
# This is deliberate, as we also use these classes to apply load
# balancing to, and the load balancer must be enabled for _all_ models
# that inher from ActiveRecord::Base; not just our own models that
# inherit from ApplicationRecord.
main : :: ActiveRecord :: Base ,
2023-04-23 21:23:45 +05:30
main_clusterwide : :: MainClusterwide :: ApplicationRecord . connection_class? ? :: MainClusterwide :: ApplicationRecord : nil ,
2022-08-13 15:12:31 +05:30
ci : :: Ci :: ApplicationRecord . connection_class? ? :: Ci :: ApplicationRecord : nil
} . compact . with_indifferent_access . freeze
end
# This returns a list of databases whose connection supports database load
# balancing. We can't reuse the database_base_models method because the Geo
# database does not support load balancing yet.
#
# TODO: https://gitlab.com/gitlab-org/geo-team/discussions/-/issues/5032
def self . database_base_models_using_load_balancing
2023-03-17 16:20:25 +05:30
@database_base_models_using_load_balancing || = {
2022-08-13 15:12:31 +05:30
# Note that we use ActiveRecord::Base here and not ApplicationRecord.
# This is deliberate, as we also use these classes to apply load
# balancing to, and the load balancer must be enabled for _all_ models
# that inher from ActiveRecord::Base; not just our own models that
# inherit from ApplicationRecord.
main : :: ActiveRecord :: Base ,
2023-04-23 21:23:45 +05:30
main_clusterwide : :: MainClusterwide :: ApplicationRecord . connection_class? ? :: MainClusterwide :: ApplicationRecord : nil ,
2022-08-13 15:12:31 +05:30
ci : :: Ci :: ApplicationRecord . connection_class? ? :: Ci :: ApplicationRecord : nil
} . compact . with_indifferent_access . freeze
end
2022-01-26 12:08:38 +05:30
# This returns a list of base models with connection associated for a given gitlab_schema
def self . schemas_to_base_models
@schemas_to_base_models || = {
gitlab_main : [ self . database_base_models . fetch ( :main ) ] ,
gitlab_ci : [ self . database_base_models [ :ci ] || self . database_base_models . fetch ( :main ) ] , # use CI or fallback to main
2022-08-13 15:12:31 +05:30
gitlab_shared : database_base_models_with_gitlab_shared . values , # all models
2023-03-04 22:38:38 +05:30
gitlab_internal : database_base_models . values , # all models
2023-04-23 21:23:45 +05:30
gitlab_pm : [ self . database_base_models . fetch ( :main ) ] , # package metadata models
gitlab_main_clusterwide : [ self . database_base_models [ :main_clusterwide ] || self . database_base_models . fetch ( :main ) ]
2022-01-26 12:08:38 +05:30
} . with_indifferent_access . freeze
end
def self . all_database_names
DATABASE_NAMES
end
2021-11-11 11:23:49 +05:30
# We configure the database connection pool size automatically based on the
# configured concurrency. We also add some headroom, to make sure we don't
# run out of connections when more threads besides the 'user-facing' ones
# are running.
#
# Read more about this in
# doc/development/database/client_side_connection_pool.md
def self . default_pool_size
headroom =
( ENV [ " DB_POOL_HEADROOM " ] . presence || DEFAULT_POOL_HEADROOM ) . to_i
Gitlab :: Runtime . max_threads + headroom
end
2023-06-20 00:43:36 +05:30
# Database configured. Returns true even if the database is shared
2021-09-30 23:02:18 +05:30
def self . has_config? ( database_name )
2023-04-23 21:23:45 +05:30
ActiveRecord :: Base . configurations
. configs_for ( env_name : Rails . env , name : database_name . to_s , include_replicas : true )
. present?
2021-09-30 23:02:18 +05:30
end
2023-06-20 00:43:36 +05:30
# Database configured. Returns false if the database is shared
def self . has_database? ( database_name )
db_config = :: Gitlab :: Database . database_base_models [ database_name ] & . connection_db_config
db_config . present? && db_config_share_with ( db_config ) . nil?
end
def self . database_mode
if ! has_config? ( CI_DATABASE_NAME )
MODE_SINGLE_DATABASE
elsif has_database? ( CI_DATABASE_NAME )
MODE_MULTIPLE_DATABASES
else
MODE_SINGLE_DATABASE_CI_CONNECTION
end
end
2022-04-04 11:22:00 +05:30
class PgUser < ApplicationRecord
self . table_name = 'pg_user'
self . primary_key = :usename
end
# rubocop: disable CodeReuse/ActiveRecord
def self . check_for_non_superuser
user = PgUser . find_by ( 'usename = CURRENT_USER' )
am_i_superuser = user . usesuper
Gitlab :: AppLogger . info (
" Account details: User: \" #{ user . usename } \" , UseSuper: ( #{ am_i_superuser } ) "
)
raise 'Error: detected superuser' if am_i_superuser
rescue ActiveRecord :: StatementInvalid
raise 'User CURRENT_USER not found'
end
# rubocop: enable CodeReuse/ActiveRecord
2020-07-28 23:09:34 +05:30
def self . check_postgres_version_and_print_warning
return if Gitlab :: Runtime . rails_runner?
2021-12-11 22:18:48 +05:30
database_base_models . each do | name , model |
database = Gitlab :: Database :: Reflection . new ( model )
next if database . postgresql_minimum_supported_version?
2021-10-27 15:23:28 +05:30
Kernel . warn ERB . new ( Rainbow . new . wrap ( << ~ EOS ) . red ) . result
█ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █
█ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █
█ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █
█ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █
█ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █ █
** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **
2023-07-09 08:55:56 +05:30
You are using PostgreSQL #{database.version} for the #{name} database, but this version of GitLab requires PostgreSQL >= <%= Gitlab::Database::MINIMUM_POSTGRES_VERSION %>.
2021-10-27 15:23:28 +05:30
< % if Rails . env . development? || Rails . env . test? % >
If using gitlab - development - kit , please find the relevant steps here :
https : / / gitlab . com / gitlab - org / gitlab - development - kit / - / blob / main / doc / howto / postgresql . md #upgrade-postgresql
< % end % >
2023-07-09 08:55:56 +05:30
Please upgrade your environment to a supported PostgreSQL version . See
2021-10-27 15:23:28 +05:30
https : / / docs . gitlab . com / ee / install / requirements . html #database for details.
** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **
EOS
rescue ActiveRecord :: ActiveRecordError , PG :: Error
# ignore - happens when Rake tasks yet have to create a database, e.g. for testing
end
2020-01-01 13:55:28 +05:30
end
2016-06-16 23:09:34 +05:30
def self . random
2019-10-12 21:52:04 +05:30
" RANDOM() "
2016-06-16 23:09:34 +05:30
end
2017-08-17 22:00:37 +05:30
def self . true_value
2019-10-12 21:52:04 +05:30
" 't' "
2015-12-23 02:04:40 +05:30
end
2017-08-17 22:00:37 +05:30
def self . false_value
2019-10-12 21:52:04 +05:30
" 'f' "
2015-12-23 02:04:40 +05:30
end
2016-04-02 18:10:28 +05:30
2018-03-17 18:26:18 +05:30
def self . sanitize_timestamp ( timestamp )
MAX_TIMESTAMP_VALUE > timestamp ? timestamp : MAX_TIMESTAMP_VALUE . dup
2017-09-10 17:25:29 +05:30
end
2022-05-07 20:08:51 +05:30
def self . all_uncached ( & block )
# Calls to #uncached only disable caching for the current connection. Since the load balancer
# can potentially upgrade from read to read-write mode (using a different connection), we specify
# up-front that we'll explicitly use the primary for the duration of the operation.
Gitlab :: Database :: LoadBalancing :: Session . current . use_primary do
2022-08-13 15:12:31 +05:30
base_models = database_base_models_using_load_balancing . values
2022-05-07 20:08:51 +05:30
base_models . reduce ( block ) { | blk , model | - > { model . uncached ( & blk ) } } . call
end
end
2021-10-27 15:23:28 +05:30
def self . allow_cross_joins_across_databases ( url : )
# this method is implemented in:
# spec/support/database/prevent_cross_joins.rb
2021-11-11 11:23:49 +05:30
yield
2016-04-02 18:10:28 +05:30
end
2018-12-05 23:21:45 +05:30
def self . add_post_migrate_path_to_rails ( force : false )
return if ENV [ 'SKIP_POST_DEPLOYMENT_MIGRATIONS' ] && ! force
Rails . application . config . paths [ 'db' ] . each do | db_path |
path = Rails . root . join ( db_path , 'post_migrate' ) . to_s
2023-01-13 00:05:48 +05:30
next if Rails . application . config . paths [ 'db/migrate' ] . include? path
Rails . application . config . paths [ 'db/migrate' ] << path
2018-12-05 23:21:45 +05:30
2023-01-13 00:05:48 +05:30
# Rails memoizes migrations at certain points where it won't read the above
# path just yet. As such we must also update the following list of paths.
ActiveRecord :: Migrator . migrations_paths << path
2018-12-05 23:21:45 +05:30
end
end
2019-09-30 21:07:59 +05:30
2021-10-27 15:23:28 +05:30
def self . db_config_names
2022-06-21 17:19:12 +05:30
:: ActiveRecord :: Base . configurations . configs_for ( env_name : Rails . env ) . map ( & :name ) - [ 'geo' ]
2021-10-27 15:23:28 +05:30
end
2022-05-07 20:08:51 +05:30
# This returns all matching schemas that a given connection can use
# Since the `ActiveRecord::Base` might change the connection (from main to ci)
# This does not look at literal connection names, but rather compares
# models that are holders for a given db_config_name
def self . gitlab_schemas_for_connection ( connection )
2022-07-23 23:45:48 +05:30
db_config = self . db_config_for_connection ( connection )
# connection might not be yet adopted (returning NullPool, and no connection_klass)
# in such cases it is fine to ignore such connections
return unless db_config
2022-10-11 01:57:18 +05:30
db_config_name = db_config . name . delete_suffix ( LoadBalancing :: LoadBalancer :: REPLICA_SUFFIX )
primary_model = self . database_base_models . fetch ( db_config_name . to_sym )
2022-06-21 17:19:12 +05:30
self . schemas_to_base_models . select do | _ , child_models |
child_models . any? do | child_model |
child_model == primary_model || \
# The model might indicate a child connection, ensure that this is enclosed in a `db_config`
self . database_base_models [ self . db_config_share_with ( child_model . connection_db_config ) ] == primary_model
end
end . keys . map! ( & :to_sym )
2022-05-07 20:08:51 +05:30
end
2021-11-11 11:23:49 +05:30
def self . db_config_for_connection ( connection )
return unless connection
2021-09-30 23:02:18 +05:30
2022-07-23 23:45:48 +05:30
# For a ConnectionProxy we want to avoid ambiguous db_config as it may
# sometimes default to replica so we always return the primary config
# instead.
2022-05-07 20:08:51 +05:30
if connection . is_a? ( :: Gitlab :: Database :: LoadBalancing :: ConnectionProxy )
2022-07-23 23:45:48 +05:30
return connection . load_balancer . configuration . db_config
2022-05-07 20:08:51 +05:30
end
2021-11-11 11:23:49 +05:30
# During application init we might receive `NullPool`
return unless connection . respond_to? ( :pool ) &&
connection . pool . respond_to? ( :db_config )
connection . pool . db_config
end
# At the moment, the connection can only be retrieved by
# Gitlab::Database::LoadBalancer#read or #read_write or from the
# ActiveRecord directly. Therefore, if the load balancer doesn't
# recognize the connection, this method returns the primary role
# directly. In future, we may need to check for other sources.
# Expected returned names:
# main, main_replica, ci, ci_replica, unknown
def self . db_config_name ( connection )
db_config = db_config_for_connection ( connection )
db_config & . name || 'unknown'
2021-09-30 23:02:18 +05:30
end
2022-06-21 17:19:12 +05:30
# Currently the database configuration can only be shared with `main:`
# If the `database_tasks: false` is being used
# This is to be refined: https://gitlab.com/gitlab-org/gitlab/-/issues/356580
def self . db_config_share_with ( db_config )
if db_config . database_tasks?
nil # no sharing
else
'main' # share with `main:`
end
end
2021-10-27 15:23:28 +05:30
def self . read_only?
false
2019-09-30 21:07:59 +05:30
end
2021-10-27 15:23:28 +05:30
def self . read_write?
! read_only?
2019-09-30 21:07:59 +05:30
end
2023-04-23 21:23:45 +05:30
# Determines minimum viable migration version, determined by the timestamp
# of the earliest migration file.
def self . read_minimum_migration_version
Dir . open (
Rails . root . join ( 'db/migrate' )
) . filter_map { | f | / \ A \ d{14} / . match ( f ) & . to_s } . map ( & :to_i ) . min
end
2019-09-30 21:07:59 +05:30
# Monkeypatch rails with upgraded database observability
2021-10-27 15:23:28 +05:30
def self . install_transaction_metrics_patches!
2019-09-30 21:07:59 +05:30
ActiveRecord :: Base . prepend ( ActiveRecordBaseTransactionMetrics )
end
2021-10-27 15:23:28 +05:30
def self . install_transaction_context_patches!
ActiveRecord :: ConnectionAdapters :: TransactionManager
. prepend ( TransactionManagerContext )
ActiveRecord :: ConnectionAdapters :: RealTransaction
. prepend ( RealTransactionContext )
end
2019-09-30 21:07:59 +05:30
# MonkeyPatch for ActiveRecord::Base for adding observability
module ActiveRecordBaseTransactionMetrics
2021-04-17 20:07:23 +05:30
extend ActiveSupport :: Concern
class_methods do
2022-08-13 15:12:31 +05:30
# A patch over ApplicationRecord.transaction that provides
2021-11-11 11:23:49 +05:30
# observability into transactional methods.
2021-04-17 20:07:23 +05:30
def transaction ( ** options , & block )
2021-12-11 22:18:48 +05:30
transaction_type = get_transaction_type ( connection . transaction_open? , options [ :requires_new ] )
:: Gitlab :: Database :: Metrics . subtransactions_increment ( self . name ) if transaction_type == :sub_transaction
payload = { connection : connection , transaction_type : transaction_type }
2021-11-11 11:23:49 +05:30
2021-12-11 22:18:48 +05:30
ActiveSupport :: Notifications . instrument ( 'transaction.active_record' , payload ) do
2021-04-17 20:07:23 +05:30
super ( ** options , & block )
end
end
2021-12-11 22:18:48 +05:30
private
def get_transaction_type ( transaction_open , requires_new_flag )
if transaction_open
return :sub_transaction if requires_new_flag
return :fake_transaction
end
:real_transaction
end
2019-09-30 21:07:59 +05:30
end
end
2021-10-27 15:23:28 +05:30
# rubocop:disable Gitlab/ModuleWithInstanceVariables
module TransactionManagerContext
def transaction_context
@stack . first . try ( :gitlab_transaction_context )
end
end
module RealTransactionContext
def gitlab_transaction_context
@gitlab_transaction_context || = :: Gitlab :: Database :: Transaction :: Context . new
end
def commit
gitlab_transaction_context . commit
super
end
def rollback
gitlab_transaction_context . rollback
super
end
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
2015-10-24 18:46:33 +05:30
end
end
2019-12-04 20:38:33 +05:30
2021-06-08 01:23:25 +05:30
Gitlab :: Database . prepend_mod_with ( 'Gitlab::Database' )