2018-11-18 11:00:15 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module Cleanup
|
|
|
|
class ProjectUploads
|
|
|
|
LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')
|
|
|
|
|
|
|
|
attr_reader :logger
|
|
|
|
|
|
|
|
def initialize(logger: nil)
|
2020-11-24 15:15:51 +05:30
|
|
|
@logger = logger || Gitlab::AppLogger
|
2018-11-18 11:00:15 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def run!(dry_run: true)
|
|
|
|
logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."
|
|
|
|
|
|
|
|
each_orphan_file do |path, upload_path|
|
|
|
|
result = cleanup(path, upload_path, dry_run)
|
|
|
|
|
|
|
|
logger.info result
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def cleanup(path, upload_path, dry_run)
|
|
|
|
# This happened in staging:
|
|
|
|
# `find` returned a path on which `File.delete` raised `Errno::ENOENT`
|
|
|
|
return "Cannot find file: #{path}" unless File.exist?(path)
|
|
|
|
|
|
|
|
correct_path = upload_path && find_correct_path(upload_path)
|
|
|
|
|
|
|
|
if correct_path
|
|
|
|
move(path, correct_path, 'fix', dry_run)
|
|
|
|
else
|
|
|
|
move_to_lost_and_found(path, dry_run)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Accepts a path in the form of "#{hex_secret}/#{filename}"
|
2018-12-05 23:21:45 +05:30
|
|
|
# rubocop: disable CodeReuse/ActiveRecord
|
2018-11-18 11:00:15 +05:30
|
|
|
def find_correct_path(upload_path)
|
|
|
|
upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
|
|
|
|
return unless upload && upload.local? && upload.model
|
|
|
|
|
|
|
|
upload.absolute_path
|
2021-06-08 01:23:25 +05:30
|
|
|
rescue StandardError => e
|
2018-11-18 11:00:15 +05:30
|
|
|
logger.error e.message
|
|
|
|
|
|
|
|
# absolute_path depends on a lot of code. If it doesn't work, then it
|
|
|
|
# it doesn't matter if the upload file is in the right place. Treat it
|
|
|
|
# as uncorrectable.
|
|
|
|
# I.e. the project record might be missing, which raises an exception.
|
|
|
|
nil
|
|
|
|
end
|
2018-12-05 23:21:45 +05:30
|
|
|
# rubocop: enable CodeReuse/ActiveRecord
|
2018-11-18 11:00:15 +05:30
|
|
|
|
|
|
|
def move_to_lost_and_found(path, dry_run)
|
|
|
|
new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/, LOST_AND_FOUND)
|
|
|
|
|
|
|
|
move(path, new_path, 'move to lost and found', dry_run)
|
|
|
|
end
|
|
|
|
|
|
|
|
def move(path, new_path, prefix, dry_run)
|
|
|
|
action = "#{prefix} #{path} -> #{new_path}"
|
|
|
|
|
|
|
|
if dry_run
|
|
|
|
"Can #{action}"
|
|
|
|
else
|
|
|
|
begin
|
|
|
|
FileUtils.mkdir_p(File.dirname(new_path))
|
|
|
|
FileUtils.mv(path, new_path)
|
|
|
|
|
|
|
|
"Did #{action}"
|
2021-06-08 01:23:25 +05:30
|
|
|
rescue StandardError => e
|
2018-11-18 11:00:15 +05:30
|
|
|
"Error during #{action}: #{e.inspect}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Yields absolute paths of project upload files that are not in the
|
|
|
|
# uploads table
|
|
|
|
def each_orphan_file
|
|
|
|
ProjectUploadFileFinder.new.each_file_batch do |file_paths|
|
|
|
|
logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"
|
|
|
|
|
|
|
|
file_paths.each do |path|
|
|
|
|
pup = ProjectUploadPath.from_path(path)
|
|
|
|
|
|
|
|
yield(path, pup.upload_path) if pup.orphan?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class ProjectUploadPath
|
|
|
|
PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze
|
|
|
|
|
|
|
|
attr_reader :full_path, :upload_path
|
|
|
|
|
|
|
|
def initialize(full_path, upload_path)
|
|
|
|
@full_path = full_path
|
|
|
|
@upload_path = upload_path
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.from_path(path)
|
|
|
|
path_matched = path.match(PROJECT_FULL_PATH_REGEX)
|
|
|
|
return new(nil, nil) unless path_matched
|
|
|
|
|
|
|
|
new(path_matched[1], path_matched[2])
|
|
|
|
end
|
|
|
|
|
2018-12-05 23:21:45 +05:30
|
|
|
# rubocop: disable CodeReuse/ActiveRecord
|
2018-11-18 11:00:15 +05:30
|
|
|
def orphan?
|
|
|
|
return true if full_path.nil? || upload_path.nil?
|
|
|
|
|
|
|
|
# It's possible to reduce to one query, but `where_full_path_in` is complex
|
|
|
|
!Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
|
|
|
|
end
|
2018-12-05 23:21:45 +05:30
|
|
|
# rubocop: enable CodeReuse/ActiveRecord
|
2018-11-18 11:00:15 +05:30
|
|
|
|
|
|
|
private
|
|
|
|
|
2018-12-05 23:21:45 +05:30
|
|
|
# rubocop: disable CodeReuse/ActiveRecord
|
2018-11-18 11:00:15 +05:30
|
|
|
def project_id
|
|
|
|
@project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
|
|
|
|
end
|
2018-12-05 23:21:45 +05:30
|
|
|
# rubocop: enable CodeReuse/ActiveRecord
|
2018-11-18 11:00:15 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|