2020-05-24 23:13:21 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
# rubocop:disable Migration/PreventStrings
|
|
|
|
|
|
|
|
# This migration cleans up Projects that were orphaned when their namespace was deleted
|
|
|
|
# Instead of deleting them, we:
|
|
|
|
# - Find (or create) the Ghost User
|
|
|
|
# - Create (if not already exists) a `lost-and-found` group owned by the Ghost User
|
|
|
|
# - Find orphaned projects --> namespace_id can not be found in namespaces
|
|
|
|
# - Move the orphaned projects to the `lost-and-found` group
|
|
|
|
# (while making them private and setting `archived=true`)
|
|
|
|
#
|
|
|
|
# On GitLab.com (2020-05-11) this migration will update 66 orphaned projects
|
|
|
|
class CleanupProjectsWithMissingNamespace < ActiveRecord::Migration[6.0]
|
|
|
|
include Gitlab::Database::MigrationHelpers
|
|
|
|
|
|
|
|
DOWNTIME = false
|
|
|
|
VISIBILITY_PRIVATE = 0
|
|
|
|
ACCESS_LEVEL_OWNER = 50
|
|
|
|
|
|
|
|
# The batch size of projects to check in each iteration
|
|
|
|
# We expect the selectivity for orphaned projects to be very low:
|
|
|
|
# (66 orphaned projects out of a total 13.6M)
|
|
|
|
# so 10K should be a safe choice
|
|
|
|
BATCH_SIZE = 10000
|
|
|
|
|
|
|
|
disable_ddl_transaction!
|
|
|
|
|
|
|
|
class UserDetail < ActiveRecord::Base
|
|
|
|
self.table_name = 'user_details'
|
|
|
|
|
|
|
|
belongs_to :user, class_name: 'CleanupProjectsWithMissingNamespace::User'
|
|
|
|
end
|
|
|
|
|
|
|
|
class User < ActiveRecord::Base
|
|
|
|
self.table_name = 'users'
|
|
|
|
|
|
|
|
LOST_AND_FOUND_GROUP = 'lost-and-found'
|
|
|
|
USER_TYPE_GHOST = 5
|
|
|
|
DEFAULT_PROJECTS_LIMIT = 100000
|
|
|
|
|
|
|
|
default_value_for :admin, false
|
|
|
|
default_value_for :can_create_group, true # we need this to create the group
|
|
|
|
default_value_for :can_create_team, false
|
|
|
|
default_value_for :project_view, :files
|
|
|
|
default_value_for :notified_of_own_activity, false
|
|
|
|
default_value_for :preferred_language, I18n.default_locale
|
|
|
|
|
|
|
|
has_one :user_detail, class_name: 'CleanupProjectsWithMissingNamespace::UserDetail'
|
|
|
|
has_one :namespace, -> { where(type: nil) },
|
|
|
|
foreign_key: :owner_id, inverse_of: :owner, autosave: true,
|
|
|
|
class_name: 'CleanupProjectsWithMissingNamespace::Namespace'
|
|
|
|
|
|
|
|
before_save :ensure_namespace_correct
|
|
|
|
before_save :ensure_bio_is_assigned_to_user_details, if: :bio_changed?
|
|
|
|
|
|
|
|
enum project_view: { readme: 0, activity: 1, files: 2 }
|
|
|
|
|
|
|
|
def ensure_namespace_correct
|
|
|
|
if namespace
|
|
|
|
namespace.path = username if username_changed?
|
|
|
|
namespace.name = name if name_changed?
|
|
|
|
else
|
|
|
|
build_namespace(path: username, name: name)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def ensure_bio_is_assigned_to_user_details
|
|
|
|
return if Feature.disabled?(:migrate_bio_to_user_details, default_enabled: true)
|
|
|
|
|
|
|
|
user_detail.bio = bio.to_s[0...255]
|
|
|
|
end
|
|
|
|
|
|
|
|
def user_detail
|
|
|
|
super.presence || build_user_detail
|
|
|
|
end
|
|
|
|
|
|
|
|
# Return (or create if necessary) the `lost-and-found` group
|
|
|
|
def lost_and_found_group
|
|
|
|
existing_lost_and_found_group || Group.create_unique_group(self, LOST_AND_FOUND_GROUP)
|
|
|
|
end
|
|
|
|
|
|
|
|
def existing_lost_and_found_group
|
|
|
|
# There should only be one Group for User Ghost starting with LOST_AND_FOUND_GROUP
|
|
|
|
Group
|
|
|
|
.joins('INNER JOIN members ON namespaces.id = members.source_id')
|
|
|
|
.where('namespaces.type = ?', 'Group')
|
|
|
|
.where('members.type = ?', 'GroupMember')
|
|
|
|
.where('members.source_type = ?', 'Namespace')
|
|
|
|
.where('members.user_id = ?', self.id)
|
|
|
|
.where('members.requested_at IS NULL')
|
|
|
|
.where('members.access_level = ?', ACCESS_LEVEL_OWNER)
|
|
|
|
.find_by(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%"))
|
|
|
|
end
|
|
|
|
|
|
|
|
class << self
|
|
|
|
# Return (or create if necessary) the ghost user
|
|
|
|
def ghost
|
|
|
|
email = 'ghost%s@example.com'
|
|
|
|
|
|
|
|
unique_internal(where(user_type: USER_TYPE_GHOST), 'ghost', email) do |u|
|
|
|
|
u.bio = _('This is a "Ghost User", created to hold all issues authored by users that have since been deleted. This user cannot be removed.')
|
|
|
|
u.name = 'Ghost User'
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def unique_internal(scope, username, email_pattern, &block)
|
|
|
|
scope.first || create_unique_internal(scope, username, email_pattern, &block)
|
|
|
|
end
|
|
|
|
|
|
|
|
def create_unique_internal(scope, username, email_pattern, &creation_block)
|
|
|
|
# Since we only want a single one of these in an instance, we use an
|
|
|
|
# exclusive lease to ensure that this block is never run concurrently.
|
|
|
|
lease_key = "user:unique_internal:#{username}"
|
|
|
|
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: 1.minute.to_i)
|
|
|
|
|
|
|
|
until uuid = lease.try_obtain
|
|
|
|
# Keep trying until we obtain the lease. To prevent hammering Redis too
|
|
|
|
# much we'll wait for a bit between retries.
|
|
|
|
sleep(1)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Recheck if the user is already present. One might have been
|
|
|
|
# added between the time we last checked (first line of this method)
|
|
|
|
# and the time we acquired the lock.
|
|
|
|
existing_user = uncached { scope.first }
|
|
|
|
return existing_user if existing_user.present?
|
|
|
|
|
|
|
|
uniquify = Uniquify.new
|
|
|
|
|
|
|
|
username = uniquify.string(username) { |s| User.find_by_username(s) }
|
|
|
|
|
|
|
|
email = uniquify.string(-> (n) { Kernel.sprintf(email_pattern, n) }) do |s|
|
|
|
|
User.find_by_email(s)
|
|
|
|
end
|
|
|
|
|
|
|
|
User.create!(
|
|
|
|
username: username,
|
|
|
|
email: email,
|
|
|
|
user_type: USER_TYPE_GHOST,
|
|
|
|
projects_limit: DEFAULT_PROJECTS_LIMIT,
|
|
|
|
state: :active,
|
|
|
|
&creation_block
|
|
|
|
)
|
|
|
|
ensure
|
|
|
|
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class Namespace < ActiveRecord::Base
|
|
|
|
self.table_name = 'namespaces'
|
|
|
|
|
|
|
|
belongs_to :owner, class_name: 'CleanupProjectsWithMissingNamespace::User'
|
|
|
|
end
|
|
|
|
|
|
|
|
class Group < Namespace
|
|
|
|
# Disable STI to allow us to manually set "type = 'Group'"
|
|
|
|
# Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::Group"
|
|
|
|
self.inheritance_column = :_type_disabled
|
|
|
|
|
|
|
|
def self.create_unique_group(user, group_name)
|
|
|
|
# 'lost-and-found' may be already defined, find a unique one
|
|
|
|
group_name = Uniquify.new.string(group_name) do |str|
|
|
|
|
Group.where(parent_id: nil, name: str).exists?
|
|
|
|
end
|
|
|
|
|
|
|
|
group = Group.create!(
|
|
|
|
name: group_name,
|
|
|
|
path: group_name,
|
|
|
|
type: 'Group',
|
|
|
|
description: 'Group to store orphaned projects',
|
|
|
|
visibility_level: VISIBILITY_PRIVATE
|
|
|
|
)
|
|
|
|
|
|
|
|
# No need to create a route for the lost-and-found group
|
|
|
|
|
|
|
|
GroupMember.add_user(group, user, ACCESS_LEVEL_OWNER)
|
|
|
|
|
|
|
|
group
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class Member < ActiveRecord::Base
|
|
|
|
self.table_name = 'members'
|
|
|
|
end
|
|
|
|
|
|
|
|
class GroupMember < Member
|
|
|
|
NOTIFICATION_SETTING_GLOBAL = 3
|
|
|
|
|
|
|
|
# Disable STI to allow us to manually set "type = 'GroupMember'"
|
|
|
|
# Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::GroupMember"
|
|
|
|
self.inheritance_column = :_type_disabled
|
|
|
|
|
|
|
|
def self.add_user(source, user, access_level)
|
|
|
|
GroupMember.create!(
|
|
|
|
type: 'GroupMember',
|
|
|
|
source_id: source.id,
|
|
|
|
user_id: user.id,
|
|
|
|
source_type: 'Namespace',
|
|
|
|
access_level: access_level,
|
|
|
|
notification_level: NOTIFICATION_SETTING_GLOBAL
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class Project < ActiveRecord::Base
|
|
|
|
self.table_name = 'projects'
|
|
|
|
|
|
|
|
include ::EachBatch
|
|
|
|
|
|
|
|
def self.without_namespace
|
|
|
|
where(
|
|
|
|
'NOT EXISTS (
|
|
|
|
SELECT 1
|
|
|
|
FROM namespaces
|
|
|
|
WHERE projects.namespace_id = namespaces.id
|
|
|
|
)'
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def up
|
|
|
|
# Reset the column information of all the models that update the database
|
|
|
|
# to ensure the Active Record's knowledge of the table structure is current
|
|
|
|
User.reset_column_information
|
|
|
|
Namespace.reset_column_information
|
|
|
|
Member.reset_column_information
|
|
|
|
Project.reset_column_information
|
|
|
|
|
|
|
|
# Find or Create the ghost user
|
|
|
|
ghost_user = User.ghost
|
|
|
|
|
|
|
|
# Find or Create the `lost-and-found`
|
|
|
|
lost_and_found = ghost_user.lost_and_found_group
|
|
|
|
|
|
|
|
# With BATCH_SIZE=10K and projects.count=13.6M
|
|
|
|
# ~1360 iterations will be run:
|
|
|
|
# - each requires on average ~160ms for relation.without_namespace
|
|
|
|
# - worst case scenario is that 66 of those batches will trigger an update (~200ms each)
|
|
|
|
# In general, we expect less than 5% (=66/13.6M x 10K) to trigger an update
|
|
|
|
# Expected total run time: ~235 seconds (== 220 seconds + 14 seconds)
|
|
|
|
Project.each_batch(of: BATCH_SIZE) do |relation|
|
|
|
|
relation.without_namespace.update_all <<~SQL
|
|
|
|
namespace_id = #{lost_and_found.id},
|
|
|
|
archived = TRUE,
|
|
|
|
visibility_level = #{VISIBILITY_PRIVATE},
|
|
|
|
|
|
|
|
-- Names are expected to be unique inside their namespace
|
|
|
|
-- (uniqueness validation on namespace_id, name)
|
|
|
|
-- Attach the id to the name and path to make sure that they are unique
|
2020-06-11 16:45:22 +05:30
|
|
|
name = name || '_' || id::text,
|
|
|
|
path = path || '_' || id::text
|
2020-05-24 23:13:21 +05:30
|
|
|
SQL
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def down
|
|
|
|
# no-op: the original state for those projects was inconsistent
|
|
|
|
# Also, the original namespace_id for each project is lost during the update
|
|
|
|
end
|
|
|
|
end
|
|
|
|
# rubocop:enable Migration/PreventStrings
|