debian-mirror-gitlab/lib/object_storage/direct_upload.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

267 lines
7.6 KiB
Ruby
Raw Normal View History

2018-12-13 13:39:08 +05:30
# frozen_string_literal: true
2018-11-08 19:23:39 +05:30
module ObjectStorage
#
2020-06-23 00:09:42 +05:30
# The DirectUpload class generates a set of presigned URLs
2018-11-08 19:23:39 +05:30
# that can be used to upload data to object storage from untrusted component: Workhorse, Runner?
#
# For Google it assumes that the platform supports variable Content-Length.
#
# For AWS it initiates Multipart Upload and presignes a set of part uploads.
# Class calculates the best part size to be able to upload up to asked maximum size.
# The number of generated parts will never go above 100,
# but we will always try to reduce amount of generated parts.
# The part size is rounded-up to 5MB.
#
class DirectUpload
include Gitlab::Utils::StrongMemoize
TIMEOUT = 4.hours
EXPIRE_OFFSET = 15.minutes
MAXIMUM_MULTIPART_PARTS = 100
MINIMUM_MULTIPART_SIZE = 5.megabytes
2020-10-24 23:57:45 +05:30
attr_reader :config, :credentials, :bucket_name, :object_name
attr_reader :has_length, :maximum_size
2018-11-08 19:23:39 +05:30
2020-10-24 23:57:45 +05:30
def initialize(config, object_name, has_length:, maximum_size: nil)
2018-11-08 19:23:39 +05:30
unless has_length
raise ArgumentError, 'maximum_size has to be specified if length is unknown' unless maximum_size
end
2020-10-24 23:57:45 +05:30
@config = config
@credentials = config.credentials
@bucket_name = config.bucket
2018-11-08 19:23:39 +05:30
@object_name = object_name
@has_length = has_length
@maximum_size = maximum_size
end
def to_hash
{
Timeout: TIMEOUT,
GetURL: get_url,
StoreURL: store_url,
DeleteURL: delete_url,
2023-03-17 16:20:25 +05:30
SkipDelete: false,
2018-11-20 20:47:30 +05:30
MultipartUpload: multipart_upload_hash,
CustomPutHeaders: true,
PutHeaders: upload_options
2020-06-23 00:09:42 +05:30
}.merge(workhorse_client_hash).compact
2018-11-08 19:23:39 +05:30
end
def multipart_upload_hash
return unless requires_multipart_upload?
{
PartSize: rounded_multipart_part_size,
PartURLs: multipart_part_urls,
CompleteURL: multipart_complete_url,
AbortURL: multipart_abort_url
}
end
2020-06-23 00:09:42 +05:30
def workhorse_client_hash
2020-10-24 23:57:45 +05:30
if config.aws?
workhorse_aws_hash
elsif config.azure?
workhorse_azure_hash
2023-01-13 00:05:48 +05:30
elsif Feature.enabled?(:workhorse_google_client) && config.google?
workhorse_google_hash
2020-10-24 23:57:45 +05:30
else
{}
end
end
2020-06-23 00:09:42 +05:30
2020-10-24 23:57:45 +05:30
def workhorse_aws_hash
2020-06-23 00:09:42 +05:30
{
UseWorkhorseClient: use_workhorse_s3_client?,
RemoteTempObjectID: object_name,
ObjectStorage: {
Provider: 'AWS',
S3Config: {
Bucket: bucket_name,
2021-06-08 01:23:25 +05:30
Region: credentials[:region] || ::Fog::AWS::Storage::DEFAULT_REGION,
2020-06-23 00:09:42 +05:30
Endpoint: credentials[:endpoint],
2020-10-24 23:57:45 +05:30
PathStyle: config.use_path_style?,
UseIamProfile: config.use_iam_profile?,
ServerSideEncryption: config.server_side_encryption,
SSEKMSKeyID: config.server_side_encryption_kms_key_id
}.compact
}
}
end
def workhorse_azure_hash
{
# Azure requires Workhorse client because direct uploads can't
# use pre-signed URLs without buffering the whole file to disk.
UseWorkhorseClient: true,
RemoteTempObjectID: object_name,
ObjectStorage: {
Provider: 'AzureRM',
GoCloudConfig: {
2020-11-24 15:15:51 +05:30
URL: azure_gocloud_url
2020-06-23 00:09:42 +05:30
}
}
}
end
2020-11-24 15:15:51 +05:30
def azure_gocloud_url
url = "azblob://#{bucket_name}"
url += "?domain=#{config.azure_storage_domain}" if config.azure_storage_domain.present?
url
end
2023-01-13 00:05:48 +05:30
def workhorse_google_hash
{
UseWorkhorseClient: use_workhorse_google_client?,
RemoteTempObjectID: object_name,
ObjectStorage: {
Provider: 'Google',
GoCloudConfig: {
URL: google_gocloud_url
}
}
}
end
def google_gocloud_url
"gs://#{bucket_name}"
end
2020-06-23 00:09:42 +05:30
def use_workhorse_s3_client?
2020-10-24 23:57:45 +05:30
return false unless config.use_iam_profile? || config.consolidated_settings?
2020-07-28 23:09:34 +05:30
# The Golang AWS SDK does not support V2 signatures
return false unless credentials.fetch(:aws_signature_version, 4).to_i >= 4
true
2020-06-23 00:09:42 +05:30
end
2023-01-13 00:05:48 +05:30
def use_workhorse_google_client?
return false unless config.consolidated_settings?
return true if credentials[:google_application_default]
return true if credentials[:google_json_key_location]
return true if credentials[:google_json_key_string]
false
end
2018-11-08 19:23:39 +05:30
def provider
credentials[:provider].to_s
end
# Implements https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectGET.html
def get_url
2020-10-24 23:57:45 +05:30
if config.google?
2020-07-28 23:09:34 +05:30
connection.get_object_https_url(bucket_name, object_name, expire_at)
else
connection.get_object_url(bucket_name, object_name, expire_at)
end
2018-11-08 19:23:39 +05:30
end
# Implements https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectDELETE.html
def delete_url
connection.delete_object_url(bucket_name, object_name, expire_at)
end
# Implements https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html
def store_url
connection.put_object_url(bucket_name, object_name, expire_at, upload_options)
end
def multipart_part_urls
Array.new(number_of_multipart_parts) do |part_index|
multipart_part_upload_url(part_index + 1)
end
end
# Implements https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPart.html
def multipart_part_upload_url(part_number)
connection.signed_url({
method: 'PUT',
bucket_name: bucket_name,
object_name: object_name,
2018-12-05 23:21:45 +05:30
query: { 'uploadId' => upload_id, 'partNumber' => part_number },
2018-11-08 19:23:39 +05:30
headers: upload_options
}, expire_at)
end
# Implements https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadComplete.html
def multipart_complete_url
connection.signed_url({
method: 'POST',
bucket_name: bucket_name,
object_name: object_name,
2018-12-05 23:21:45 +05:30
query: { 'uploadId' => upload_id },
2018-11-08 19:23:39 +05:30
headers: { 'Content-Type' => 'application/xml' }
}, expire_at)
end
# Implements https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadAbort.html
def multipart_abort_url
connection.signed_url({
method: 'DELETE',
bucket_name: bucket_name,
object_name: object_name,
2018-12-05 23:21:45 +05:30
query: { 'uploadId' => upload_id }
2018-11-08 19:23:39 +05:30
}, expire_at)
end
private
def rounded_multipart_part_size
2021-02-22 17:27:13 +05:30
# round multipart_part_size up to minimum_multipart_size
2018-11-08 19:23:39 +05:30
(multipart_part_size + MINIMUM_MULTIPART_SIZE - 1) / MINIMUM_MULTIPART_SIZE * MINIMUM_MULTIPART_SIZE
end
def multipart_part_size
2021-02-22 17:27:13 +05:30
return MINIMUM_MULTIPART_SIZE if maximum_size == 0
2018-11-08 19:23:39 +05:30
maximum_size / number_of_multipart_parts
end
def number_of_multipart_parts
2021-02-22 17:27:13 +05:30
# If we don't have max length, we can only assume the file is as large as possible.
return MAXIMUM_MULTIPART_PARTS if maximum_size == 0
2018-11-08 19:23:39 +05:30
[
# round maximum_size up to minimum_mulitpart_size
(maximum_size + MINIMUM_MULTIPART_SIZE - 1) / MINIMUM_MULTIPART_SIZE,
MAXIMUM_MULTIPART_PARTS
].min
end
def requires_multipart_upload?
2022-07-23 23:45:48 +05:30
return false unless config.aws?
2022-10-11 01:57:18 +05:30
return false if use_workhorse_s3_client?
2022-07-23 23:45:48 +05:30
!has_length
2018-11-08 19:23:39 +05:30
end
def upload_id
return unless requires_multipart_upload?
strong_memoize(:upload_id) do
2020-10-24 23:57:45 +05:30
new_upload = connection.initiate_multipart_upload(bucket_name, object_name, config.fog_attributes)
2018-11-08 19:23:39 +05:30
new_upload.body["UploadId"]
end
end
def expire_at
strong_memoize(:expire_at) do
Time.now + TIMEOUT + EXPIRE_OFFSET
end
end
def upload_options
2018-11-20 20:47:30 +05:30
{}
2018-11-08 19:23:39 +05:30
end
def connection
@connection ||= ::Fog::Storage.new(credentials)
end
end
end