debian-mirror-gitlab/lib/gitlab/cleanup/project_uploads.rb

132 lines
4 KiB
Ruby
Raw Permalink Normal View History

2018-11-18 11:00:15 +05:30
# frozen_string_literal: true
module Gitlab
module Cleanup
class ProjectUploads
LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')
attr_reader :logger
def initialize(logger: nil)
2020-11-24 15:15:51 +05:30
@logger = logger || Gitlab::AppLogger
2018-11-18 11:00:15 +05:30
end
def run!(dry_run: true)
logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."
each_orphan_file do |path, upload_path|
result = cleanup(path, upload_path, dry_run)
logger.info result
end
end
private
def cleanup(path, upload_path, dry_run)
# This happened in staging:
# `find` returned a path on which `File.delete` raised `Errno::ENOENT`
return "Cannot find file: #{path}" unless File.exist?(path)
correct_path = upload_path && find_correct_path(upload_path)
if correct_path
move(path, correct_path, 'fix', dry_run)
else
move_to_lost_and_found(path, dry_run)
end
end
# Accepts a path in the form of "#{hex_secret}/#{filename}"
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-11-18 11:00:15 +05:30
def find_correct_path(upload_path)
upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
return unless upload && upload.local? && upload.model
upload.absolute_path
2021-06-08 01:23:25 +05:30
rescue StandardError => e
2018-11-18 11:00:15 +05:30
logger.error e.message
# absolute_path depends on a lot of code. If it doesn't work, then it
# it doesn't matter if the upload file is in the right place. Treat it
# as uncorrectable.
# I.e. the project record might be missing, which raises an exception.
nil
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2018-11-18 11:00:15 +05:30
def move_to_lost_and_found(path, dry_run)
new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/, LOST_AND_FOUND)
move(path, new_path, 'move to lost and found', dry_run)
end
def move(path, new_path, prefix, dry_run)
action = "#{prefix} #{path} -> #{new_path}"
if dry_run
"Can #{action}"
else
begin
FileUtils.mkdir_p(File.dirname(new_path))
FileUtils.mv(path, new_path)
"Did #{action}"
2021-06-08 01:23:25 +05:30
rescue StandardError => e
2018-11-18 11:00:15 +05:30
"Error during #{action}: #{e.inspect}"
end
end
end
# Yields absolute paths of project upload files that are not in the
# uploads table
def each_orphan_file
ProjectUploadFileFinder.new.each_file_batch do |file_paths|
logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"
file_paths.each do |path|
pup = ProjectUploadPath.from_path(path)
yield(path, pup.upload_path) if pup.orphan?
end
end
end
class ProjectUploadPath
PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze
attr_reader :full_path, :upload_path
def initialize(full_path, upload_path)
@full_path = full_path
@upload_path = upload_path
end
def self.from_path(path)
path_matched = path.match(PROJECT_FULL_PATH_REGEX)
return new(nil, nil) unless path_matched
new(path_matched[1], path_matched[2])
end
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-11-18 11:00:15 +05:30
def orphan?
return true if full_path.nil? || upload_path.nil?
# It's possible to reduce to one query, but `where_full_path_in` is complex
!Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2018-11-18 11:00:15 +05:30
private
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2018-11-18 11:00:15 +05:30
def project_id
@project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2018-11-18 11:00:15 +05:30
end
end
end
end