# frozen_string_literal: true module Gitlab module Database DATABASE_NAMES = %w[main ci main_clusterwide].freeze MAIN_DATABASE_NAME = 'main' CI_DATABASE_NAME = 'ci' DEFAULT_POOL_HEADROOM = 10 # This constant is used when renaming tables concurrently. # If you plan to rename a table using the `rename_table_safely` method, add your table here one milestone before the rename. # Example: # TABLES_TO_BE_RENAMED = { # 'old_name' => 'new_name' # }.freeze TABLES_TO_BE_RENAMED = {}.freeze # Minimum PostgreSQL version requirement per documentation: # https://docs.gitlab.com/ee/install/requirements.html#postgresql-requirements MINIMUM_POSTGRES_VERSION = 12 # https://www.postgresql.org/docs/9.2/static/datatype-numeric.html MAX_INT_VALUE = 2147483647 MIN_INT_VALUE = -2147483648 # The max value between MySQL's TIMESTAMP and PostgreSQL's timestampz: # https://www.postgresql.org/docs/9.1/static/datatype-datetime.html # https://dev.mysql.com/doc/refman/5.7/en/datetime.html # FIXME: this should just be the max value of timestampz MAX_TIMESTAMP_VALUE = Time.at((1 << 31) - 1).freeze # The maximum number of characters for text fields, to avoid DoS attacks via parsing huge text fields # https://gitlab.com/gitlab-org/gitlab-foss/issues/61974 MAX_TEXT_SIZE_LIMIT = 1_000_000 # Migrations before this version may have been removed MIN_SCHEMA_GITLAB_VERSION = '15.0' # Schema we store dynamically managed partitions in (e.g. for time partitioning) DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic # Schema we store static partitions in (e.g. for hash partitioning) STATIC_PARTITIONS_SCHEMA = :gitlab_partitions_static # This is an extensive list of postgres schemas owned by GitLab # It does not include the default public schema EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA, STATIC_PARTITIONS_SCHEMA].freeze PRIMARY_DATABASE_NAME = ActiveRecord::Base.connection_db_config.name.to_sym # rubocop:disable Database/MultipleDatabases FULLY_QUALIFIED_IDENTIFIER = /^\w+\.\w+$/ ## Database Modes MODE_SINGLE_DATABASE = "single-database" MODE_SINGLE_DATABASE_CI_CONNECTION = "single-database-ci-connection" MODE_MULTIPLE_DATABASES = "multiple-databases" def self.database_base_models @database_base_models ||= { # Note that we use ActiveRecord::Base here and not ApplicationRecord. # This is deliberate, as we also use these classes to apply load # balancing to, and the load balancer must be enabled for _all_ models # that inherit from ActiveRecord::Base; not just our own models that # inherit from ApplicationRecord. main: ::ActiveRecord::Base, main_clusterwide: ::MainClusterwide::ApplicationRecord.connection_class? ? ::MainClusterwide::ApplicationRecord : nil, ci: ::Ci::ApplicationRecord.connection_class? ? ::Ci::ApplicationRecord : nil }.compact.with_indifferent_access.freeze end # This returns a list of databases that contains all the gitlab_shared schema # tables. We can't reuse database_base_models because Geo does not support # the gitlab_shared tables yet. def self.database_base_models_with_gitlab_shared @database_base_models_with_gitlab_shared ||= { # Note that we use ActiveRecord::Base here and not ApplicationRecord. # This is deliberate, as we also use these classes to apply load # balancing to, and the load balancer must be enabled for _all_ models # that inher from ActiveRecord::Base; not just our own models that # inherit from ApplicationRecord. main: ::ActiveRecord::Base, main_clusterwide: ::MainClusterwide::ApplicationRecord.connection_class? ? ::MainClusterwide::ApplicationRecord : nil, ci: ::Ci::ApplicationRecord.connection_class? ? ::Ci::ApplicationRecord : nil }.compact.with_indifferent_access.freeze end # This returns a list of databases whose connection supports database load # balancing. We can't reuse the database_base_models method because the Geo # database does not support load balancing yet. # # TODO: https://gitlab.com/gitlab-org/geo-team/discussions/-/issues/5032 def self.database_base_models_using_load_balancing @database_base_models_using_load_balancing ||= { # Note that we use ActiveRecord::Base here and not ApplicationRecord. # This is deliberate, as we also use these classes to apply load # balancing to, and the load balancer must be enabled for _all_ models # that inher from ActiveRecord::Base; not just our own models that # inherit from ApplicationRecord. main: ::ActiveRecord::Base, main_clusterwide: ::MainClusterwide::ApplicationRecord.connection_class? ? ::MainClusterwide::ApplicationRecord : nil, ci: ::Ci::ApplicationRecord.connection_class? ? ::Ci::ApplicationRecord : nil }.compact.with_indifferent_access.freeze end # This returns a list of base models with connection associated for a given gitlab_schema def self.schemas_to_base_models @schemas_to_base_models ||= { gitlab_main: [self.database_base_models.fetch(:main)], gitlab_ci: [self.database_base_models[:ci] || self.database_base_models.fetch(:main)], # use CI or fallback to main gitlab_shared: database_base_models_with_gitlab_shared.values, # all models gitlab_internal: database_base_models.values, # all models gitlab_pm: [self.database_base_models.fetch(:main)], # package metadata models gitlab_main_clusterwide: [self.database_base_models[:main_clusterwide] || self.database_base_models.fetch(:main)] }.with_indifferent_access.freeze end def self.all_database_names DATABASE_NAMES end # We configure the database connection pool size automatically based on the # configured concurrency. We also add some headroom, to make sure we don't # run out of connections when more threads besides the 'user-facing' ones # are running. # # Read more about this in # doc/development/database/client_side_connection_pool.md def self.default_pool_size headroom = (ENV["DB_POOL_HEADROOM"].presence || DEFAULT_POOL_HEADROOM).to_i Gitlab::Runtime.max_threads + headroom end # Database configured. Returns true even if the database is shared def self.has_config?(database_name) ActiveRecord::Base.configurations .configs_for(env_name: Rails.env, name: database_name.to_s, include_replicas: true) .present? end # Database configured. Returns false if the database is shared def self.has_database?(database_name) db_config = ::Gitlab::Database.database_base_models[database_name]&.connection_db_config db_config.present? && db_config_share_with(db_config).nil? end def self.database_mode if !has_config?(CI_DATABASE_NAME) MODE_SINGLE_DATABASE elsif has_database?(CI_DATABASE_NAME) MODE_MULTIPLE_DATABASES else MODE_SINGLE_DATABASE_CI_CONNECTION end end class PgUser < ApplicationRecord self.table_name = 'pg_user' self.primary_key = :usename end # rubocop: disable CodeReuse/ActiveRecord def self.check_for_non_superuser user = PgUser.find_by('usename = CURRENT_USER') am_i_superuser = user.usesuper Gitlab::AppLogger.info( "Account details: User: \"#{user.usename}\", UseSuper: (#{am_i_superuser})" ) raise 'Error: detected superuser' if am_i_superuser rescue ActiveRecord::StatementInvalid raise 'User CURRENT_USER not found' end # rubocop: enable CodeReuse/ActiveRecord def self.check_postgres_version_and_print_warning return if Gitlab::Runtime.rails_runner? database_base_models.each do |name, model| database = Gitlab::Database::Reflection.new(model) next if database.postgresql_minimum_supported_version? Kernel.warn ERB.new(Rainbow.new.wrap(<<~EOS).red).result ██  ██  █████  ██████  ███  ██ ██ ███  ██  ██████  ██  ██ ██   ██ ██   ██ ████  ██ ██ ████  ██ ██       ██  █  ██ ███████ ██████  ██ ██  ██ ██ ██ ██  ██ ██  ███  ██ ███ ██ ██   ██ ██   ██ ██  ██ ██ ██ ██  ██ ██ ██  ██   ███ ███  ██  ██ ██  ██ ██   ████ ██ ██   ████  ██████   ****************************************************************************** You are using PostgreSQL #{database.version} for the #{name} database, but PostgreSQL >= <%= Gitlab::Database::MINIMUM_POSTGRES_VERSION %> is required for this version of GitLab. <% if Rails.env.development? || Rails.env.test? %> If using gitlab-development-kit, please find the relevant steps here: https://gitlab.com/gitlab-org/gitlab-development-kit/-/blob/main/doc/howto/postgresql.md#upgrade-postgresql <% end %> Please upgrade your environment to a supported PostgreSQL version, see https://docs.gitlab.com/ee/install/requirements.html#database for details. ****************************************************************************** EOS rescue ActiveRecord::ActiveRecordError, PG::Error # ignore - happens when Rake tasks yet have to create a database, e.g. for testing end end def self.random "RANDOM()" end def self.true_value "'t'" end def self.false_value "'f'" end def self.sanitize_timestamp(timestamp) MAX_TIMESTAMP_VALUE > timestamp ? timestamp : MAX_TIMESTAMP_VALUE.dup end def self.all_uncached(&block) # Calls to #uncached only disable caching for the current connection. Since the load balancer # can potentially upgrade from read to read-write mode (using a different connection), we specify # up-front that we'll explicitly use the primary for the duration of the operation. Gitlab::Database::LoadBalancing::Session.current.use_primary do base_models = database_base_models_using_load_balancing.values base_models.reduce(block) { |blk, model| -> { model.uncached(&blk) } }.call end end def self.allow_cross_joins_across_databases(url:) # this method is implemented in: # spec/support/database/prevent_cross_joins.rb yield end def self.add_post_migrate_path_to_rails(force: false) return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force Rails.application.config.paths['db'].each do |db_path| path = Rails.root.join(db_path, 'post_migrate').to_s next if Rails.application.config.paths['db/migrate'].include? path Rails.application.config.paths['db/migrate'] << path # Rails memoizes migrations at certain points where it won't read the above # path just yet. As such we must also update the following list of paths. ActiveRecord::Migrator.migrations_paths << path end end def self.db_config_names ::ActiveRecord::Base.configurations.configs_for(env_name: Rails.env).map(&:name) - ['geo'] end # This returns all matching schemas that a given connection can use # Since the `ActiveRecord::Base` might change the connection (from main to ci) # This does not look at literal connection names, but rather compares # models that are holders for a given db_config_name def self.gitlab_schemas_for_connection(connection) db_config = self.db_config_for_connection(connection) # connection might not be yet adopted (returning NullPool, and no connection_klass) # in such cases it is fine to ignore such connections return unless db_config db_config_name = db_config.name.delete_suffix(LoadBalancing::LoadBalancer::REPLICA_SUFFIX) primary_model = self.database_base_models.fetch(db_config_name.to_sym) self.schemas_to_base_models.select do |_, child_models| child_models.any? do |child_model| child_model == primary_model || \ # The model might indicate a child connection, ensure that this is enclosed in a `db_config` self.database_base_models[self.db_config_share_with(child_model.connection_db_config)] == primary_model end end.keys.map!(&:to_sym) end def self.db_config_for_connection(connection) return unless connection # For a ConnectionProxy we want to avoid ambiguous db_config as it may # sometimes default to replica so we always return the primary config # instead. if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy) return connection.load_balancer.configuration.db_config end # During application init we might receive `NullPool` return unless connection.respond_to?(:pool) && connection.pool.respond_to?(:db_config) connection.pool.db_config end # At the moment, the connection can only be retrieved by # Gitlab::Database::LoadBalancer#read or #read_write or from the # ActiveRecord directly. Therefore, if the load balancer doesn't # recognize the connection, this method returns the primary role # directly. In future, we may need to check for other sources. # Expected returned names: # main, main_replica, ci, ci_replica, unknown def self.db_config_name(connection) db_config = db_config_for_connection(connection) db_config&.name || 'unknown' end # Currently the database configuration can only be shared with `main:` # If the `database_tasks: false` is being used # This is to be refined: https://gitlab.com/gitlab-org/gitlab/-/issues/356580 def self.db_config_share_with(db_config) if db_config.database_tasks? nil # no sharing else 'main' # share with `main:` end end def self.read_only? false end def self.read_write? !read_only? end # Determines minimum viable migration version, determined by the timestamp # of the earliest migration file. def self.read_minimum_migration_version Dir.open( Rails.root.join('db/migrate') ).filter_map { |f| /\A\d{14}/.match(f)&.to_s }.map(&:to_i).min end # Monkeypatch rails with upgraded database observability def self.install_transaction_metrics_patches! ActiveRecord::Base.prepend(ActiveRecordBaseTransactionMetrics) end def self.install_transaction_context_patches! ActiveRecord::ConnectionAdapters::TransactionManager .prepend(TransactionManagerContext) ActiveRecord::ConnectionAdapters::RealTransaction .prepend(RealTransactionContext) end # MonkeyPatch for ActiveRecord::Base for adding observability module ActiveRecordBaseTransactionMetrics extend ActiveSupport::Concern class_methods do # A patch over ApplicationRecord.transaction that provides # observability into transactional methods. def transaction(**options, &block) transaction_type = get_transaction_type(connection.transaction_open?, options[:requires_new]) ::Gitlab::Database::Metrics.subtransactions_increment(self.name) if transaction_type == :sub_transaction payload = { connection: connection, transaction_type: transaction_type } ActiveSupport::Notifications.instrument('transaction.active_record', payload) do super(**options, &block) end end private def get_transaction_type(transaction_open, requires_new_flag) if transaction_open return :sub_transaction if requires_new_flag return :fake_transaction end :real_transaction end end end # rubocop:disable Gitlab/ModuleWithInstanceVariables module TransactionManagerContext def transaction_context @stack.first.try(:gitlab_transaction_context) end end module RealTransactionContext def gitlab_transaction_context @gitlab_transaction_context ||= ::Gitlab::Database::Transaction::Context.new end def commit gitlab_transaction_context.commit super end def rollback gitlab_transaction_context.rollback super end end # rubocop:enable Gitlab/ModuleWithInstanceVariables end end Gitlab::Database.prepend_mod_with('Gitlab::Database')