2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
require 'spec_helper'
2019-10-12 21:52:04 +05:30
require Rails . root . join ( 'ee' , 'spec' , 'db' , 'schema_support' ) if Gitlab . ee?
2019-02-15 15:39:39 +05:30
2023-03-17 16:20:25 +05:30
RSpec . describe 'Database schema' , feature_category : :database do
2021-06-08 01:23:25 +05:30
prepend_mod_with ( 'DB::SchemaSupport' )
2019-12-04 20:38:33 +05:30
2019-02-15 15:39:39 +05:30
let ( :tables ) { connection . tables }
2020-07-28 23:09:34 +05:30
let ( :columns_name_with_jsonb ) { retrieve_columns_name_with_jsonb }
2019-02-15 15:39:39 +05:30
2022-06-21 17:19:12 +05:30
IGNORED_INDEXES_ON_FKS = {
2023-04-23 21:23:45 +05:30
slack_integrations_scopes : %w[ slack_api_scope_id ] ,
# Will be removed in https://gitlab.com/gitlab-org/gitlab/-/issues/391312
approval_project_rules : %w[ scan_result_policy_id ] ,
approval_merge_request_rules : %w[ scan_result_policy_id ]
2022-06-21 17:19:12 +05:30
} . with_indifferent_access . freeze
2023-01-13 00:05:48 +05:30
TABLE_PARTITIONS = %w[ ci_builds_metadata ] . freeze
2020-07-28 23:09:34 +05:30
# List of columns historically missing a FK, don't add more columns
2023-01-13 00:05:48 +05:30
# See: https://docs.gitlab.com/ee/development/database/foreign_keys.html#naming-foreign-keys
2019-02-15 15:39:39 +05:30
IGNORED_FK_COLUMNS = {
abuse_reports : %w[ reporter_id user_id ] ,
2019-12-26 22:10:19 +05:30
application_settings : %w[ performance_bar_allowed_group_id slack_app_id snowplow_app_id eks_account_id eks_access_key_id ] ,
2019-09-04 21:01:54 +05:30
approvals : %w[ user_id ] ,
approver_groups : %w[ target_id ] ,
2020-05-24 23:13:21 +05:30
approvers : %w[ target_id user_id ] ,
2022-07-16 23:28:13 +05:30
analytics_cycle_analytics_aggregations : %w[ last_full_issues_id last_full_merge_requests_id last_incremental_issues_id last_full_run_issues_id last_full_run_merge_requests_id last_incremental_merge_requests_id last_consistency_check_issues_stage_event_hash_id last_consistency_check_issues_issuable_id last_consistency_check_merge_requests_stage_event_hash_id last_consistency_check_merge_requests_issuable_id ] ,
2021-12-11 22:18:48 +05:30
analytics_cycle_analytics_merge_request_stage_events : %w[ author_id group_id merge_request_id milestone_id project_id stage_event_hash_id state_id ] ,
analytics_cycle_analytics_issue_stage_events : %w[ author_id group_id issue_id milestone_id project_id stage_event_hash_id state_id ] ,
2020-11-24 15:15:51 +05:30
audit_events : %w[ author_id entity_id target_id ] ,
2019-02-15 15:39:39 +05:30
award_emoji : %w[ awardable_id user_id ] ,
2019-12-21 20:55:43 +05:30
aws_roles : %w[ role_external_id ] ,
2021-02-22 17:27:13 +05:30
boards : %w[ milestone_id iteration_id ] ,
2023-03-17 16:20:25 +05:30
chat_names : %w[ chat_id team_id user_id integration_id ] ,
2019-02-15 15:39:39 +05:30
chat_teams : %w[ team_id ] ,
2023-03-04 22:38:38 +05:30
ci_build_needs : %w[ partition_id ] ,
2023-04-23 21:23:45 +05:30
ci_build_pending_states : %w[ partition_id build_id ] ,
2023-03-04 22:38:38 +05:30
ci_build_report_results : %w[ partition_id ] ,
2023-04-23 21:23:45 +05:30
ci_build_trace_chunks : %w[ partition_id build_id ] ,
2023-03-04 22:38:38 +05:30
ci_build_trace_metadata : %w[ partition_id ] ,
2022-10-11 01:57:18 +05:30
ci_builds : %w[ erased_by_id trigger_request_id partition_id ] ,
2023-04-23 21:23:45 +05:30
ci_builds_runner_session : %w[ partition_id build_id ] ,
p_ci_builds_metadata : %w[ partition_id ] ,
2022-10-11 01:57:18 +05:30
ci_job_artifacts : %w[ partition_id ] ,
2023-03-04 22:38:38 +05:30
ci_job_variables : %w[ partition_id ] ,
2021-03-11 19:13:27 +05:30
ci_namespace_monthly_usages : %w[ namespace_id ] ,
2023-03-04 22:38:38 +05:30
ci_pending_builds : %w[ partition_id ] ,
2022-10-11 01:57:18 +05:30
ci_pipeline_variables : %w[ partition_id ] ,
ci_pipelines : %w[ partition_id ] ,
2023-04-23 21:23:45 +05:30
ci_resources : %w[ partition_id build_id ] ,
2019-02-15 15:39:39 +05:30
ci_runner_projects : %w[ runner_id ] ,
2023-03-04 22:38:38 +05:30
ci_running_builds : %w[ partition_id ] ,
ci_sources_pipelines : %w[ partition_id source_partition_id ] ,
2022-10-11 01:57:18 +05:30
ci_stages : %w[ partition_id ] ,
2019-02-15 15:39:39 +05:30
ci_trigger_requests : %w[ commit_id ] ,
2023-04-23 21:23:45 +05:30
ci_unit_test_failures : %w[ partition_id build_id ] ,
2019-12-21 20:55:43 +05:30
cluster_providers_aws : %w[ security_group_id vpc_id access_key_id ] ,
2019-02-15 15:39:39 +05:30
cluster_providers_gcp : %w[ gcp_project_id operation_id ] ,
2021-01-03 14:25:43 +05:30
compliance_management_frameworks : %w[ group_id ] ,
2020-05-24 23:13:21 +05:30
commit_user_mentions : %w[ commit_id ] ,
2021-11-11 11:23:49 +05:30
dep_ci_build_trace_sections : %w[ build_id ] ,
2019-02-15 15:39:39 +05:30
deploy_keys_projects : %w[ deploy_key_id ] ,
2021-09-30 23:02:18 +05:30
deployments : %w[ deployable_id user_id ] ,
2019-10-12 21:52:04 +05:30
draft_notes : %w[ discussion_id commit_id ] ,
2020-03-13 15:44:24 +05:30
epics : %w[ updated_by_id last_edited_by_id state_id ] ,
2020-05-24 23:13:21 +05:30
events : %w[ target_id ] ,
2019-02-15 15:39:39 +05:30
forked_project_links : %w[ forked_from_project_id ] ,
2019-09-04 21:01:54 +05:30
geo_event_log : %w[ hashed_storage_attachments_event_id ] ,
geo_node_statuses : %w[ last_event_id cursor_last_event_id ] ,
geo_nodes : %w[ oauth_application_id ] ,
geo_repository_deleted_events : %w[ project_id ] ,
2022-10-11 01:57:18 +05:30
ghost_user_migrations : %w[ initiator_user_id ] ,
2020-01-01 13:55:28 +05:30
gitlab_subscription_histories : %w[ gitlab_subscription_id hosted_plan_id namespace_id ] ,
2019-02-15 15:39:39 +05:30
identities : %w[ user_id ] ,
2020-05-24 23:13:21 +05:30
import_failures : %w[ project_id ] ,
2019-07-07 11:18:12 +05:30
issues : %w[ last_edited_by_id state_id ] ,
2022-01-26 12:08:38 +05:30
issue_emails : %w[ email_message_id ] ,
2019-09-04 21:01:54 +05:30
jira_tracker_data : %w[ jira_issue_transition_id ] ,
2019-02-15 15:39:39 +05:30
keys : %w[ user_id ] ,
label_links : %w[ target_id ] ,
2019-09-04 21:01:54 +05:30
ldap_group_links : %w[ group_id ] ,
2019-02-15 15:39:39 +05:30
members : %w[ source_id created_by_id ] ,
2019-07-07 11:18:12 +05:30
merge_requests : %w[ last_edited_by_id state_id ] ,
2023-04-23 21:23:45 +05:30
merge_requests_compliance_violations : %w[ target_project_id ] ,
2021-09-30 23:02:18 +05:30
merge_request_diff_commits : %w[ commit_author_id committer_id ] ,
2019-02-15 15:39:39 +05:30
namespaces : %w[ owner_id parent_id ] ,
2020-03-13 15:44:24 +05:30
notes : %w[ author_id commit_id noteable_id updated_by_id resolved_by_id confirmed_by_id discussion_id ] ,
2019-02-15 15:39:39 +05:30
notification_settings : %w[ source_id ] ,
oauth_access_grants : %w[ resource_owner_id application_id ] ,
oauth_access_tokens : %w[ resource_owner_id application_id ] ,
oauth_applications : %w[ owner_id ] ,
2020-07-28 23:09:34 +05:30
product_analytics_events_experimental : %w[ event_id txn_id user_id ] ,
2022-05-07 20:08:51 +05:30
project_build_artifacts_size_refreshes : %w[ last_job_artifact_id ] ,
2023-04-23 21:23:45 +05:30
project_data_transfers : %w[ project_id namespace_id ] ,
2022-07-01 11:34:44 +05:30
project_error_tracking_settings : %w[ sentry_project_id ] ,
2019-02-15 15:39:39 +05:30
project_group_links : %w[ group_id ] ,
project_statistics : %w[ namespace_id ] ,
2020-05-24 23:13:21 +05:30
projects : %w[ creator_id ci_id mirror_user_id ] ,
2019-02-15 15:39:39 +05:30
redirect_routes : %w[ source_id ] ,
repository_languages : %w[ programming_language_id ] ,
routes : %w[ source_id ] ,
sent_notifications : %w[ project_id noteable_id recipient_id commit_id in_reply_to_discussion_id ] ,
2022-07-16 23:28:13 +05:30
slack_integrations : %w[ team_id user_id bot_user_id ] , # these are external Slack IDs
2019-02-15 15:39:39 +05:30
snippets : %w[ author_id ] ,
spam_logs : %w[ user_id ] ,
2021-09-04 01:27:46 +05:30
status_check_responses : %w[ external_approval_rule_id ] ,
2019-02-15 15:39:39 +05:30
subscriptions : %w[ user_id subscribable_id ] ,
2020-05-24 23:13:21 +05:30
suggestions : %w[ commit_id ] ,
2019-02-15 15:39:39 +05:30
taggings : %w[ tag_id taggable_id tagger_id ] ,
timelogs : %w[ user_id ] ,
todos : %w[ target_id commit_id ] ,
uploads : %w[ model_id ] ,
user_agent_details : %w[ subject_id ] ,
2019-09-04 21:01:54 +05:30
users : %w[ color_scheme_id created_by_id theme_id email_opted_in_source_id ] ,
2019-02-15 15:39:39 +05:30
users_star_projects : %w[ user_id ] ,
2019-09-04 21:01:54 +05:30
vulnerability_identifiers : %w[ external_id ] ,
2021-10-27 15:23:28 +05:30
vulnerability_scanners : %w[ external_id ] ,
2022-03-02 08:16:31 +05:30
security_scans : %w[ pipeline_id ] , # foreign key is not added as ci_pipeline table will be moved into different db soon
2022-07-23 23:45:48 +05:30
vulnerability_reads : %w[ cluster_agent_id ] ,
# See: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/87584
# Fixes performance issues with the deletion of web-hooks with many log entries
web_hook_logs : %w[ web_hook_id ]
2019-02-15 15:39:39 +05:30
} . with_indifferent_access . freeze
context 'for table' do
2023-03-04 22:38:38 +05:30
Gitlab :: Database :: EachDatabase . each_database_connection do | connection , _ |
schemas_for_connection = Gitlab :: Database . gitlab_schemas_for_connection ( connection )
( connection . tables - TABLE_PARTITIONS ) . sort . each do | table |
table_schema = Gitlab :: Database :: GitlabSchema . table_schema ( table )
next unless schemas_for_connection . include? ( table_schema )
describe table do
let ( :indexes ) { connection . indexes ( table ) }
let ( :columns ) { connection . columns ( table ) }
let ( :foreign_keys ) { connection . foreign_keys ( table ) }
let ( :loose_foreign_keys ) { Gitlab :: Database :: LooseForeignKeys . definitions . group_by ( & :from_table ) . fetch ( table , [ ] ) }
let ( :all_foreign_keys ) { foreign_keys + loose_foreign_keys }
# take the first column in case we're using a composite primary key
let ( :primary_key_column ) { Array ( connection . primary_key ( table ) ) . first }
context 'all foreign keys' do
# for index to be effective, the FK constraint has to be at first place
it 'are indexed' do
first_indexed_column = indexes . filter_map do | index |
columns = index . columns
# In cases of complex composite indexes, a string is returned eg:
# "lower((extern_uid)::text), group_id"
columns = columns . split ( ',' ) if columns . is_a? ( String )
column = columns . first . chomp
# A partial index is not suitable for a foreign key column, unless
# the only condition is for the presence of the foreign key itself
column if index . where . nil? || index . where == " ( #{ column } IS NOT NULL) "
end
foreign_keys_columns = all_foreign_keys . map ( & :column )
required_indexed_columns = foreign_keys_columns - ignored_index_columns ( table )
# Add the primary key column to the list of indexed columns because
# postgres and mysql both automatically create an index on the primary
# key. Also, the rails connection.indexes() method does not return
# automatically generated indexes (like the primary key index).
first_indexed_column . push ( primary_key_column )
expect ( first_indexed_column . uniq ) . to include ( * required_indexed_columns )
2021-04-17 20:07:23 +05:30
end
2019-02-15 15:39:39 +05:30
end
2023-03-04 22:38:38 +05:30
context 'columns ending with _id' do
let ( :column_names ) { columns . map ( & :name ) }
let ( :column_names_with_id ) { column_names . select { | column_name | column_name . ends_with? ( '_id' ) } }
2023-03-17 16:20:25 +05:30
let ( :foreign_keys_columns ) { all_foreign_keys . reject { | fk | fk . name & . end_with? ( " _p " ) } . map ( & :column ) . uniq } # we can have FK and loose FK present at the same time
2023-03-04 22:38:38 +05:30
let ( :ignored_columns ) { ignored_fk_columns ( table ) }
2019-02-15 15:39:39 +05:30
2023-03-04 22:38:38 +05:30
it 'do have the foreign keys' do
expect ( column_names_with_id - ignored_columns ) . to match_array ( foreign_keys_columns )
end
2020-07-28 23:09:34 +05:30
2023-03-04 22:38:38 +05:30
it 'and having foreign key are not in the ignore list' do
expect ( ignored_columns ) . to match_array ( ignored_columns - foreign_keys )
end
2019-02-15 15:39:39 +05:30
end
end
end
end
end
2019-12-26 22:10:19 +05:30
# These pre-existing enums have limits > 2 bytes
IGNORED_LIMIT_ENUMS = {
2023-03-17 16:20:25 +05:30
'Analytics::CycleAnalytics::Stage' = > %w[ start_event_identifier end_event_identifier ] ,
2019-12-26 22:10:19 +05:30
'Ci::Bridge' = > %w[ failure_reason ] ,
'Ci::Build' = > %w[ failure_reason ] ,
'Ci::BuildMetadata' = > %w[ timeout_source ] ,
'Ci::BuildTraceChunk' = > %w[ data_store ] ,
2020-04-22 19:07:51 +05:30
'Ci::DailyReportResult' = > %w[ param_type ] ,
2019-12-26 22:10:19 +05:30
'Ci::JobArtifact' = > %w[ file_type ] ,
'Ci::Pipeline' = > %w[ source config_source failure_reason ] ,
2020-03-13 15:44:24 +05:30
'Ci::Processable' = > %w[ failure_reason ] ,
2019-12-26 22:10:19 +05:30
'Ci::Runner' = > %w[ access_level ] ,
'Ci::Stage' = > %w[ status ] ,
'Clusters::Applications::Ingress' = > %w[ ingress_type ] ,
'Clusters::Cluster' = > %w[ platform_type provider_type ] ,
'CommitStatus' = > %w[ failure_reason ] ,
'GenericCommitStatus' = > %w[ failure_reason ] ,
'Gitlab::DatabaseImporters::CommonMetrics::PrometheusMetric' = > %w[ group ] ,
'InternalId' = > %w[ usage ] ,
'List' = > %w[ list_type ] ,
'NotificationSetting' = > %w[ level ] ,
'Project' = > %w[ auto_cancel_pending_pipelines ] ,
'ProjectAutoDevops' = > %w[ deploy_strategy ] ,
'PrometheusMetric' = > %w[ group ] ,
'ResourceLabelEvent' = > %w[ action ] ,
'User' = > %w[ layout dashboard project_view ] ,
2022-01-26 12:08:38 +05:30
'Users::Callout' = > %w[ feature_name ] ,
2019-12-26 22:10:19 +05:30
'PrometheusAlert' = > %w[ operator ]
} . freeze
2022-06-21 17:19:12 +05:30
context 'for enums' , :eager_load do
# skip model if it is an abstract class as it would not have an associated DB table
let ( :models ) { ApplicationRecord . descendants . reject ( & :abstract_class? ) }
2020-10-24 23:57:45 +05:30
2022-06-21 17:19:12 +05:30
it 'uses smallint for enums in all models' , :aggregate_failures do
models . each do | model |
ignored_enums = ignored_limit_enums ( model . name )
enums = model . defined_enums . keys - ignored_enums
2019-12-26 22:10:19 +05:30
2022-06-21 17:19:12 +05:30
expect ( model ) . to use_smallint_for_enums ( enums )
2019-12-26 22:10:19 +05:30
end
end
end
2020-07-28 23:09:34 +05:30
# These pre-existing columns does not use a schema validation yet
IGNORED_JSONB_COLUMNS = {
" ApplicationSetting " = > %w[ repository_storages_weighted ] ,
" AlertManagement::Alert " = > %w[ payload ] ,
" Ci::BuildMetadata " = > %w[ config_options config_variables ] ,
2023-01-13 00:05:48 +05:30
" Ci::BuildMetadata::Partitioned " = > %w[ config_options config_variables id_tokens runtime_runner_features secrets ] ,
2021-03-11 19:13:27 +05:30
" ExperimentSubject " = > %w[ context ] ,
2021-02-22 17:27:13 +05:30
" ExperimentUser " = > %w[ context ] ,
2020-07-28 23:09:34 +05:30
" Geo::Event " = > %w[ payload ] ,
" GeoNodeStatus " = > %w[ status ] ,
" Operations::FeatureFlagScope " = > %w[ strategies ] ,
" Operations::FeatureFlags::Strategy " = > %w[ parameters ] ,
" Packages::Composer::Metadatum " = > %w[ composer_json ] ,
2020-10-24 23:57:45 +05:30
" RawUsageData " = > %w[ payload ] , # Usage data payload changes often, we cannot use one schema
2021-11-18 22:05:49 +05:30
" Releases::Evidence " = > %w[ summary ] ,
2023-03-04 22:38:38 +05:30
" Vulnerabilities::Finding::Evidence " = > %w[ data ] , # Validation work in progress
" EE::Gitlab::BackgroundMigration::FixSecurityScanStatuses::SecurityScan " = > %w[ info ] # This is a migration model
2020-07-28 23:09:34 +05:30
} . freeze
# We are skipping GEO models for now as it adds up complexity
describe 'for jsonb columns' do
2021-12-11 22:18:48 +05:30
it 'uses json schema validator' , :eager_load do
2020-07-28 23:09:34 +05:30
columns_name_with_jsonb . each do | hash |
next if models_by_table_name [ hash [ " table_name " ] ] . nil?
models_by_table_name [ hash [ " table_name " ] ] . each do | model |
jsonb_columns = [ hash [ " column_name " ] ] - ignored_jsonb_columns ( model . name )
expect ( model ) . to validate_jsonb_schema ( jsonb_columns )
end
end
end
end
context 'existence of Postgres schemas' do
def get_schemas
sql = << ~ SQL
SELECT schema_name FROM
information_schema . schemata
WHERE
NOT schema_name ~ * '^pg_' AND NOT schema_name = 'information_schema'
AND catalog_name = current_database ( )
SQL
ApplicationRecord . connection . select_all ( sql ) . map do | row |
row [ 'schema_name' ]
end
end
it 'we have a public schema' do
expect ( get_schemas ) . to include ( 'public' )
end
Gitlab :: Database :: EXTRA_SCHEMAS . each do | schema |
it " we have a ' #{ schema } ' schema' " do
expect ( get_schemas ) . to include ( schema . to_s )
end
end
it 'we do not have unexpected schemas' do
expect ( get_schemas . size ) . to eq ( Gitlab :: Database :: EXTRA_SCHEMAS . size + 1 )
end
end
2021-01-03 14:25:43 +05:30
context 'primary keys' do
it 'expects every table to have a primary key defined' do
2023-03-04 22:38:38 +05:30
Gitlab :: Database :: EachDatabase . each_database_connection do | connection , _ |
schemas_for_connection = Gitlab :: Database . gitlab_schemas_for_connection ( connection )
2021-01-03 14:25:43 +05:30
2023-03-04 22:38:38 +05:30
problematic_tables = connection . tables . select do | table |
table_schema = Gitlab :: Database :: GitlabSchema . table_schema ( table )
schemas_for_connection . include? ( table_schema ) && ! connection . primary_key ( table ) . present?
end . map ( & :to_sym )
2021-01-03 14:25:43 +05:30
2023-03-04 22:38:38 +05:30
expect ( problematic_tables ) . to be_empty
end
2021-01-03 14:25:43 +05:30
end
end
2021-09-30 23:02:18 +05:30
context 'index names' do
it 'disallows index names with a _ccnew[0-9]* suffix' do
# During REINDEX operations, Postgres generates a temporary index with a _ccnew[0-9]* suffix
# Since indexes are being considered temporary and subject to removal if they stick around for longer. See Gitlab::Database::Reindexing.
#
# Hence we disallow adding permanent indexes with this suffix.
problematic_indexes = Gitlab :: Database :: PostgresIndex . match ( " #{ Gitlab :: Database :: Reindexing :: ReindexConcurrently :: TEMPORARY_INDEX_PATTERN } $ " ) . all
expect ( problematic_indexes ) . to be_empty
end
end
2019-02-15 15:39:39 +05:30
private
2020-07-28 23:09:34 +05:30
def retrieve_columns_name_with_jsonb
sql = << ~ SQL
SELECT table_name , column_name , data_type
FROM information_schema . columns
2021-09-04 01:27:46 +05:30
WHERE table_catalog = '#{ApplicationRecord.connection_db_config.database}'
2020-07-28 23:09:34 +05:30
AND table_schema = 'public'
AND table_name NOT LIKE 'pg_%'
AND data_type = 'jsonb'
ORDER BY table_name , column_name , data_type
SQL
ApplicationRecord . connection . select_all ( sql ) . to_a
end
def models_by_table_name
@models_by_table_name || = ApplicationRecord . descendants . reject ( & :abstract_class ) . group_by ( & :table_name )
end
2022-06-21 17:19:12 +05:30
def ignored_fk_columns ( table )
IGNORED_FK_COLUMNS . fetch ( table , [ ] )
end
def ignored_index_columns ( table )
IGNORED_INDEXES_ON_FKS . fetch ( table , [ ] )
2019-02-15 15:39:39 +05:30
end
2019-12-26 22:10:19 +05:30
def ignored_limit_enums ( model )
IGNORED_LIMIT_ENUMS . fetch ( model , [ ] )
end
2020-07-28 23:09:34 +05:30
def ignored_jsonb_columns ( model )
IGNORED_JSONB_COLUMNS . fetch ( model , [ ] )
end
2019-02-15 15:39:39 +05:30
end