debian-mirror-gitlab/spec/db/schema_spec.rb

405 lines
18 KiB
Ruby
Raw Normal View History

2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
require 'spec_helper'
2019-10-12 21:52:04 +05:30
require Rails.root.join('ee', 'spec', 'db', 'schema_support') if Gitlab.ee?
2019-02-15 15:39:39 +05:30
2023-03-17 16:20:25 +05:30
RSpec.describe 'Database schema', feature_category: :database do
2021-06-08 01:23:25 +05:30
prepend_mod_with('DB::SchemaSupport')
2019-12-04 20:38:33 +05:30
2019-02-15 15:39:39 +05:30
let(:tables) { connection.tables }
2020-07-28 23:09:34 +05:30
let(:columns_name_with_jsonb) { retrieve_columns_name_with_jsonb }
2019-02-15 15:39:39 +05:30
2022-06-21 17:19:12 +05:30
IGNORED_INDEXES_ON_FKS = {
2023-04-23 21:23:45 +05:30
slack_integrations_scopes: %w[slack_api_scope_id],
2023-06-20 00:43:36 +05:30
p_ci_builds_metadata: %w[partition_id], # composable FK, the columns are reversed in the index definition
p_ci_runner_machine_builds: %w[partition_id] # composable FK, the columns are reversed in the index definition
2022-06-21 17:19:12 +05:30
}.with_indifferent_access.freeze
2023-01-13 00:05:48 +05:30
TABLE_PARTITIONS = %w[ci_builds_metadata].freeze
2023-06-20 00:43:36 +05:30
# If splitting FK and table removal into two MRs as suggested in the docs, use this constant in the initial FK removal MR.
# In the subsequent table removal MR, remove the entries.
# See: https://docs.gitlab.com/ee/development/migration_style_guide.html#dropping-a-database-table
REMOVED_FKS = {
clusters_applications_cert_managers: %w[cluster_id],
clusters_applications_cilium: %w[cluster_id],
clusters_applications_crossplane: %w[cluster_id],
clusters_applications_helm: %w[cluster_id],
clusters_applications_ingress: %w[cluster_id],
clusters_applications_jupyter: %w[cluster_id oauth_application_id],
clusters_applications_knative: %w[cluster_id],
clusters_applications_prometheus: %w[cluster_id],
clusters_applications_runners: %w[cluster_id],
serverless_domain_cluster: %w[clusters_applications_knative_id creator_id pages_domain_id]
}.with_indifferent_access.freeze
2020-07-28 23:09:34 +05:30
# List of columns historically missing a FK, don't add more columns
2023-01-13 00:05:48 +05:30
# See: https://docs.gitlab.com/ee/development/database/foreign_keys.html#naming-foreign-keys
2019-02-15 15:39:39 +05:30
IGNORED_FK_COLUMNS = {
abuse_reports: %w[reporter_id user_id],
2019-12-26 22:10:19 +05:30
application_settings: %w[performance_bar_allowed_group_id slack_app_id snowplow_app_id eks_account_id eks_access_key_id],
2019-09-04 21:01:54 +05:30
approvals: %w[user_id],
approver_groups: %w[target_id],
2020-05-24 23:13:21 +05:30
approvers: %w[target_id user_id],
2022-07-16 23:28:13 +05:30
analytics_cycle_analytics_aggregations: %w[last_full_issues_id last_full_merge_requests_id last_incremental_issues_id last_full_run_issues_id last_full_run_merge_requests_id last_incremental_merge_requests_id last_consistency_check_issues_stage_event_hash_id last_consistency_check_issues_issuable_id last_consistency_check_merge_requests_stage_event_hash_id last_consistency_check_merge_requests_issuable_id],
2021-12-11 22:18:48 +05:30
analytics_cycle_analytics_merge_request_stage_events: %w[author_id group_id merge_request_id milestone_id project_id stage_event_hash_id state_id],
analytics_cycle_analytics_issue_stage_events: %w[author_id group_id issue_id milestone_id project_id stage_event_hash_id state_id],
2020-11-24 15:15:51 +05:30
audit_events: %w[author_id entity_id target_id],
2019-02-15 15:39:39 +05:30
award_emoji: %w[awardable_id user_id],
2019-12-21 20:55:43 +05:30
aws_roles: %w[role_external_id],
2021-02-22 17:27:13 +05:30
boards: %w[milestone_id iteration_id],
2023-06-20 00:43:36 +05:30
broadcast_messages: %w[namespace_id],
2023-03-17 16:20:25 +05:30
chat_names: %w[chat_id team_id user_id integration_id],
2019-02-15 15:39:39 +05:30
chat_teams: %w[team_id],
2023-05-27 22:25:52 +05:30
ci_build_needs: %w[partition_id build_id],
2023-04-23 21:23:45 +05:30
ci_build_pending_states: %w[partition_id build_id],
2023-05-27 22:25:52 +05:30
ci_build_report_results: %w[partition_id build_id],
2023-04-23 21:23:45 +05:30
ci_build_trace_chunks: %w[partition_id build_id],
2023-05-27 22:25:52 +05:30
ci_build_trace_metadata: %w[partition_id build_id],
2022-10-11 01:57:18 +05:30
ci_builds: %w[erased_by_id trigger_request_id partition_id],
2023-04-23 21:23:45 +05:30
ci_builds_runner_session: %w[partition_id build_id],
2023-05-27 22:25:52 +05:30
p_ci_builds_metadata: %w[partition_id build_id runner_machine_id],
ci_job_artifacts: %w[partition_id job_id],
ci_job_variables: %w[partition_id job_id],
2021-03-11 19:13:27 +05:30
ci_namespace_monthly_usages: %w[namespace_id],
2023-05-27 22:25:52 +05:30
ci_pending_builds: %w[partition_id build_id],
2022-10-11 01:57:18 +05:30
ci_pipeline_variables: %w[partition_id],
ci_pipelines: %w[partition_id],
2023-04-23 21:23:45 +05:30
ci_resources: %w[partition_id build_id],
2019-02-15 15:39:39 +05:30
ci_runner_projects: %w[runner_id],
2023-05-27 22:25:52 +05:30
ci_running_builds: %w[partition_id build_id],
ci_sources_pipelines: %w[partition_id source_partition_id source_job_id],
2022-10-11 01:57:18 +05:30
ci_stages: %w[partition_id],
2019-02-15 15:39:39 +05:30
ci_trigger_requests: %w[commit_id],
2023-04-23 21:23:45 +05:30
ci_unit_test_failures: %w[partition_id build_id],
2019-12-21 20:55:43 +05:30
cluster_providers_aws: %w[security_group_id vpc_id access_key_id],
2019-02-15 15:39:39 +05:30
cluster_providers_gcp: %w[gcp_project_id operation_id],
2021-01-03 14:25:43 +05:30
compliance_management_frameworks: %w[group_id],
2020-05-24 23:13:21 +05:30
commit_user_mentions: %w[commit_id],
2021-11-11 11:23:49 +05:30
dep_ci_build_trace_sections: %w[build_id],
2019-02-15 15:39:39 +05:30
deploy_keys_projects: %w[deploy_key_id],
2021-09-30 23:02:18 +05:30
deployments: %w[deployable_id user_id],
2019-10-12 21:52:04 +05:30
draft_notes: %w[discussion_id commit_id],
2020-03-13 15:44:24 +05:30
epics: %w[updated_by_id last_edited_by_id state_id],
2020-05-24 23:13:21 +05:30
events: %w[target_id],
2019-02-15 15:39:39 +05:30
forked_project_links: %w[forked_from_project_id],
2019-09-04 21:01:54 +05:30
geo_event_log: %w[hashed_storage_attachments_event_id],
geo_node_statuses: %w[last_event_id cursor_last_event_id],
geo_nodes: %w[oauth_application_id],
geo_repository_deleted_events: %w[project_id],
2022-10-11 01:57:18 +05:30
ghost_user_migrations: %w[initiator_user_id],
2020-01-01 13:55:28 +05:30
gitlab_subscription_histories: %w[gitlab_subscription_id hosted_plan_id namespace_id],
2019-02-15 15:39:39 +05:30
identities: %w[user_id],
2020-05-24 23:13:21 +05:30
import_failures: %w[project_id],
2019-07-07 11:18:12 +05:30
issues: %w[last_edited_by_id state_id],
2022-01-26 12:08:38 +05:30
issue_emails: %w[email_message_id],
2019-09-04 21:01:54 +05:30
jira_tracker_data: %w[jira_issue_transition_id],
2019-02-15 15:39:39 +05:30
keys: %w[user_id],
label_links: %w[target_id],
2019-09-04 21:01:54 +05:30
ldap_group_links: %w[group_id],
2019-02-15 15:39:39 +05:30
members: %w[source_id created_by_id],
2019-07-07 11:18:12 +05:30
merge_requests: %w[last_edited_by_id state_id],
2023-04-23 21:23:45 +05:30
merge_requests_compliance_violations: %w[target_project_id],
2021-09-30 23:02:18 +05:30
merge_request_diff_commits: %w[commit_author_id committer_id],
2019-02-15 15:39:39 +05:30
namespaces: %w[owner_id parent_id],
2020-03-13 15:44:24 +05:30
notes: %w[author_id commit_id noteable_id updated_by_id resolved_by_id confirmed_by_id discussion_id],
2019-02-15 15:39:39 +05:30
notification_settings: %w[source_id],
oauth_access_grants: %w[resource_owner_id application_id],
oauth_access_tokens: %w[resource_owner_id application_id],
oauth_applications: %w[owner_id],
2023-05-27 22:25:52 +05:30
p_ci_runner_machine_builds: %w[partition_id build_id],
2020-07-28 23:09:34 +05:30
product_analytics_events_experimental: %w[event_id txn_id user_id],
2022-05-07 20:08:51 +05:30
project_build_artifacts_size_refreshes: %w[last_job_artifact_id],
2023-04-23 21:23:45 +05:30
project_data_transfers: %w[project_id namespace_id],
2022-07-01 11:34:44 +05:30
project_error_tracking_settings: %w[sentry_project_id],
2019-02-15 15:39:39 +05:30
project_statistics: %w[namespace_id],
2023-05-27 22:25:52 +05:30
projects: %w[ci_id mirror_user_id],
2019-02-15 15:39:39 +05:30
redirect_routes: %w[source_id],
repository_languages: %w[programming_language_id],
routes: %w[source_id],
sent_notifications: %w[project_id noteable_id recipient_id commit_id in_reply_to_discussion_id],
2022-07-16 23:28:13 +05:30
slack_integrations: %w[team_id user_id bot_user_id], # these are external Slack IDs
2019-02-15 15:39:39 +05:30
snippets: %w[author_id],
spam_logs: %w[user_id],
2021-09-04 01:27:46 +05:30
status_check_responses: %w[external_approval_rule_id],
2019-02-15 15:39:39 +05:30
subscriptions: %w[user_id subscribable_id],
2020-05-24 23:13:21 +05:30
suggestions: %w[commit_id],
2019-02-15 15:39:39 +05:30
taggings: %w[tag_id taggable_id tagger_id],
timelogs: %w[user_id],
todos: %w[target_id commit_id],
uploads: %w[model_id],
user_agent_details: %w[subject_id],
2019-09-04 21:01:54 +05:30
users: %w[color_scheme_id created_by_id theme_id email_opted_in_source_id],
2019-02-15 15:39:39 +05:30
users_star_projects: %w[user_id],
2019-09-04 21:01:54 +05:30
vulnerability_identifiers: %w[external_id],
2021-10-27 15:23:28 +05:30
vulnerability_scanners: %w[external_id],
2022-03-02 08:16:31 +05:30
security_scans: %w[pipeline_id], # foreign key is not added as ci_pipeline table will be moved into different db soon
2022-07-23 23:45:48 +05:30
vulnerability_reads: %w[cluster_agent_id],
# See: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/87584
# Fixes performance issues with the deletion of web-hooks with many log entries
2023-06-20 00:43:36 +05:30
web_hook_logs: %w[web_hook_id],
ml_candidates: %w[internal_id]
2019-02-15 15:39:39 +05:30
}.with_indifferent_access.freeze
context 'for table' do
2023-03-04 22:38:38 +05:30
Gitlab::Database::EachDatabase.each_database_connection do |connection, _|
schemas_for_connection = Gitlab::Database.gitlab_schemas_for_connection(connection)
(connection.tables - TABLE_PARTITIONS).sort.each do |table|
table_schema = Gitlab::Database::GitlabSchema.table_schema(table)
next unless schemas_for_connection.include?(table_schema)
describe table do
let(:indexes) { connection.indexes(table) }
let(:columns) { connection.columns(table) }
let(:foreign_keys) { connection.foreign_keys(table) }
let(:loose_foreign_keys) { Gitlab::Database::LooseForeignKeys.definitions.group_by(&:from_table).fetch(table, []) }
let(:all_foreign_keys) { foreign_keys + loose_foreign_keys }
# take the first column in case we're using a composite primary key
let(:primary_key_column) { Array(connection.primary_key(table)).first }
context 'all foreign keys' do
# for index to be effective, the FK constraint has to be at first place
it 'are indexed' do
first_indexed_column = indexes.filter_map do |index|
columns = index.columns
# In cases of complex composite indexes, a string is returned eg:
# "lower((extern_uid)::text), group_id"
columns = columns.split(',') if columns.is_a?(String)
column = columns.first.chomp
# A partial index is not suitable for a foreign key column, unless
# the only condition is for the presence of the foreign key itself
column if index.where.nil? || index.where == "(#{column} IS NOT NULL)"
end
foreign_keys_columns = all_foreign_keys.map(&:column)
required_indexed_columns = foreign_keys_columns - ignored_index_columns(table)
# Add the primary key column to the list of indexed columns because
# postgres and mysql both automatically create an index on the primary
# key. Also, the rails connection.indexes() method does not return
# automatically generated indexes (like the primary key index).
first_indexed_column.push(primary_key_column)
expect(first_indexed_column.uniq).to include(*required_indexed_columns)
2021-04-17 20:07:23 +05:30
end
2019-02-15 15:39:39 +05:30
end
2023-03-04 22:38:38 +05:30
context 'columns ending with _id' do
let(:column_names) { columns.map(&:name) }
let(:column_names_with_id) { column_names.select { |column_name| column_name.ends_with?('_id') } }
let(:ignored_columns) { ignored_fk_columns(table) }
2023-05-27 22:25:52 +05:30
let(:foreign_keys_columns) do
all_foreign_keys
.reject { |fk| fk.name&.end_with?("_p") || fk.name&.end_with?("_id_convert_to_bigint") }
.map(&:column)
.uniq # we can have FK and loose FK present at the same time
end
2019-02-15 15:39:39 +05:30
2023-03-04 22:38:38 +05:30
it 'do have the foreign keys' do
expect(column_names_with_id - ignored_columns).to match_array(foreign_keys_columns)
end
2020-07-28 23:09:34 +05:30
2023-03-04 22:38:38 +05:30
it 'and having foreign key are not in the ignore list' do
expect(ignored_columns).to match_array(ignored_columns - foreign_keys)
end
2019-02-15 15:39:39 +05:30
end
end
end
end
end
2019-12-26 22:10:19 +05:30
# These pre-existing enums have limits > 2 bytes
IGNORED_LIMIT_ENUMS = {
2023-03-17 16:20:25 +05:30
'Analytics::CycleAnalytics::Stage' => %w[start_event_identifier end_event_identifier],
2019-12-26 22:10:19 +05:30
'Ci::Bridge' => %w[failure_reason],
'Ci::Build' => %w[failure_reason],
'Ci::BuildMetadata' => %w[timeout_source],
'Ci::BuildTraceChunk' => %w[data_store],
2020-04-22 19:07:51 +05:30
'Ci::DailyReportResult' => %w[param_type],
2019-12-26 22:10:19 +05:30
'Ci::JobArtifact' => %w[file_type],
'Ci::Pipeline' => %w[source config_source failure_reason],
2020-03-13 15:44:24 +05:30
'Ci::Processable' => %w[failure_reason],
2019-12-26 22:10:19 +05:30
'Ci::Runner' => %w[access_level],
'Ci::Stage' => %w[status],
'Clusters::Cluster' => %w[platform_type provider_type],
'CommitStatus' => %w[failure_reason],
'GenericCommitStatus' => %w[failure_reason],
'Gitlab::DatabaseImporters::CommonMetrics::PrometheusMetric' => %w[group],
'InternalId' => %w[usage],
'List' => %w[list_type],
'NotificationSetting' => %w[level],
'Project' => %w[auto_cancel_pending_pipelines],
'ProjectAutoDevops' => %w[deploy_strategy],
'PrometheusMetric' => %w[group],
'ResourceLabelEvent' => %w[action],
'User' => %w[layout dashboard project_view],
2022-01-26 12:08:38 +05:30
'Users::Callout' => %w[feature_name],
2019-12-26 22:10:19 +05:30
'PrometheusAlert' => %w[operator]
}.freeze
2022-06-21 17:19:12 +05:30
context 'for enums', :eager_load do
# skip model if it is an abstract class as it would not have an associated DB table
let(:models) { ApplicationRecord.descendants.reject(&:abstract_class?) }
2020-10-24 23:57:45 +05:30
2022-06-21 17:19:12 +05:30
it 'uses smallint for enums in all models', :aggregate_failures do
models.each do |model|
ignored_enums = ignored_limit_enums(model.name)
enums = model.defined_enums.keys - ignored_enums
2019-12-26 22:10:19 +05:30
2022-06-21 17:19:12 +05:30
expect(model).to use_smallint_for_enums(enums)
2019-12-26 22:10:19 +05:30
end
end
end
2020-07-28 23:09:34 +05:30
# These pre-existing columns does not use a schema validation yet
IGNORED_JSONB_COLUMNS = {
"ApplicationSetting" => %w[repository_storages_weighted],
"AlertManagement::Alert" => %w[payload],
"Ci::BuildMetadata" => %w[config_options config_variables],
2023-01-13 00:05:48 +05:30
"Ci::BuildMetadata::Partitioned" => %w[config_options config_variables id_tokens runtime_runner_features secrets],
2021-03-11 19:13:27 +05:30
"ExperimentSubject" => %w[context],
2021-02-22 17:27:13 +05:30
"ExperimentUser" => %w[context],
2020-07-28 23:09:34 +05:30
"Geo::Event" => %w[payload],
"GeoNodeStatus" => %w[status],
"Operations::FeatureFlagScope" => %w[strategies],
"Operations::FeatureFlags::Strategy" => %w[parameters],
"Packages::Composer::Metadatum" => %w[composer_json],
2020-10-24 23:57:45 +05:30
"RawUsageData" => %w[payload], # Usage data payload changes often, we cannot use one schema
2021-11-18 22:05:49 +05:30
"Releases::Evidence" => %w[summary],
2023-03-04 22:38:38 +05:30
"Vulnerabilities::Finding::Evidence" => %w[data], # Validation work in progress
"EE::Gitlab::BackgroundMigration::FixSecurityScanStatuses::SecurityScan" => %w[info] # This is a migration model
2020-07-28 23:09:34 +05:30
}.freeze
# We are skipping GEO models for now as it adds up complexity
describe 'for jsonb columns' do
2021-12-11 22:18:48 +05:30
it 'uses json schema validator', :eager_load do
2020-07-28 23:09:34 +05:30
columns_name_with_jsonb.each do |hash|
next if models_by_table_name[hash["table_name"]].nil?
models_by_table_name[hash["table_name"]].each do |model|
jsonb_columns = [hash["column_name"]] - ignored_jsonb_columns(model.name)
expect(model).to validate_jsonb_schema(jsonb_columns)
end
end
end
end
context 'existence of Postgres schemas' do
def get_schemas
sql = <<~SQL
SELECT schema_name FROM
information_schema.schemata
WHERE
NOT schema_name ~* '^pg_' AND NOT schema_name = 'information_schema'
AND catalog_name = current_database()
SQL
ApplicationRecord.connection.select_all(sql).map do |row|
row['schema_name']
end
end
it 'we have a public schema' do
expect(get_schemas).to include('public')
end
Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
it "we have a '#{schema}' schema'" do
expect(get_schemas).to include(schema.to_s)
end
end
it 'we do not have unexpected schemas' do
expect(get_schemas.size).to eq(Gitlab::Database::EXTRA_SCHEMAS.size + 1)
end
end
2021-01-03 14:25:43 +05:30
context 'primary keys' do
it 'expects every table to have a primary key defined' do
2023-03-04 22:38:38 +05:30
Gitlab::Database::EachDatabase.each_database_connection do |connection, _|
schemas_for_connection = Gitlab::Database.gitlab_schemas_for_connection(connection)
2021-01-03 14:25:43 +05:30
2023-03-04 22:38:38 +05:30
problematic_tables = connection.tables.select do |table|
table_schema = Gitlab::Database::GitlabSchema.table_schema(table)
schemas_for_connection.include?(table_schema) && !connection.primary_key(table).present?
end.map(&:to_sym)
2021-01-03 14:25:43 +05:30
2023-03-04 22:38:38 +05:30
expect(problematic_tables).to be_empty
end
2021-01-03 14:25:43 +05:30
end
2023-06-20 00:43:36 +05:30
context 'for CI partitioned table' do
# Check that each partitionable model with more than 1 column has the partition_id column at the trailing
# position. Using PARTITIONABLE_MODELS instead of iterating tables since when partitioning existing tables,
# the routing table only gets created after the PK has already been created, which would be too late for a check.
skip_tables = %w[]
partitionable_models = Ci::Partitionable::Testing::PARTITIONABLE_MODELS
(partitionable_models - skip_tables).each do |klass|
model = klass.safe_constantize
table_name = model.table_name
primary_key_columns = Array(model.connection.primary_key(table_name))
next if primary_key_columns.count == 1
describe table_name do
it 'expects every PK to have partition_id at trailing position' do
expect(primary_key_columns).to match([an_instance_of(String), 'partition_id'])
end
end
end
end
2021-01-03 14:25:43 +05:30
end
2021-09-30 23:02:18 +05:30
context 'index names' do
it 'disallows index names with a _ccnew[0-9]* suffix' do
# During REINDEX operations, Postgres generates a temporary index with a _ccnew[0-9]* suffix
# Since indexes are being considered temporary and subject to removal if they stick around for longer. See Gitlab::Database::Reindexing.
#
# Hence we disallow adding permanent indexes with this suffix.
problematic_indexes = Gitlab::Database::PostgresIndex.match("#{Gitlab::Database::Reindexing::ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$").all
expect(problematic_indexes).to be_empty
end
end
2019-02-15 15:39:39 +05:30
private
2020-07-28 23:09:34 +05:30
def retrieve_columns_name_with_jsonb
sql = <<~SQL
SELECT table_name, column_name, data_type
FROM information_schema.columns
2021-09-04 01:27:46 +05:30
WHERE table_catalog = '#{ApplicationRecord.connection_db_config.database}'
2020-07-28 23:09:34 +05:30
AND table_schema = 'public'
AND table_name NOT LIKE 'pg_%'
AND data_type = 'jsonb'
ORDER BY table_name, column_name, data_type
SQL
ApplicationRecord.connection.select_all(sql).to_a
end
def models_by_table_name
@models_by_table_name ||= ApplicationRecord.descendants.reject(&:abstract_class).group_by(&:table_name)
end
2022-06-21 17:19:12 +05:30
def ignored_fk_columns(table)
2023-06-20 00:43:36 +05:30
REMOVED_FKS.merge(IGNORED_FK_COLUMNS).fetch(table, [])
2022-06-21 17:19:12 +05:30
end
def ignored_index_columns(table)
IGNORED_INDEXES_ON_FKS.fetch(table, [])
2019-02-15 15:39:39 +05:30
end
2019-12-26 22:10:19 +05:30
def ignored_limit_enums(model)
IGNORED_LIMIT_ENUMS.fetch(model, [])
end
2020-07-28 23:09:34 +05:30
def ignored_jsonb_columns(model)
IGNORED_JSONB_COLUMNS.fetch(model, [])
end
2019-02-15 15:39:39 +05:30
end