debian-mirror-gitlab/workhorse/internal/upload/destination/destination.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

247 lines
7.3 KiB
Go
Raw Normal View History

2022-05-07 20:08:51 +05:30
// The destination package handles uploading to a specific destination (delegates
// to filestore or objectstore packages) based on options from the pre-authorization
// API and finalizing the upload.
package destination
2021-02-22 17:27:13 +05:30
import (
"context"
"errors"
"fmt"
"io"
"os"
"strconv"
"time"
2023-07-09 08:55:56 +05:30
"github.com/golang-jwt/jwt/v5"
2021-02-22 17:27:13 +05:30
"gitlab.com/gitlab-org/labkit/log"
2021-10-27 15:23:28 +05:30
"gitlab.com/gitlab-org/gitlab/workhorse/internal/secret"
2022-05-07 20:08:51 +05:30
"gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/destination/filestore"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/destination/objectstore"
2021-02-22 17:27:13 +05:30
)
type SizeError error
// ErrEntityTooLarge means that the uploaded content is bigger then maximum allowed size
var ErrEntityTooLarge = errors.New("entity is too large")
// FileHandler represent a file that has been processed for upload
// it may be either uploaded to an ObjectStore and/or saved on local path.
type FileHandler struct {
// LocalPath is the path on the disk where file has been stored
LocalPath string
// RemoteID is the objectID provided by GitLab Rails
RemoteID string
// RemoteURL is ObjectStore URL provided by GitLab Rails
RemoteURL string
// Size is the persisted file size
Size int64
// Name is the resource name to send back to GitLab rails.
// It differ from the real file name in order to avoid file collisions
Name string
// a map containing different hashes
hashes map[string]string
2021-12-11 22:18:48 +05:30
// Duration of upload in seconds
uploadDuration float64
2021-02-22 17:27:13 +05:30
}
type uploadClaims struct {
Upload map[string]string `json:"upload"`
2022-07-23 23:45:48 +05:30
jwt.RegisteredClaims
2021-02-22 17:27:13 +05:30
}
// SHA256 hash of the handled file
func (fh *FileHandler) SHA256() string {
return fh.hashes["sha256"]
}
// MD5 hash of the handled file
func (fh *FileHandler) MD5() string {
return fh.hashes["md5"]
}
// GitLabFinalizeFields returns a map with all the fields GitLab Rails needs in order to finalize the upload.
func (fh *FileHandler) GitLabFinalizeFields(prefix string) (map[string]string, error) {
2021-10-27 15:23:28 +05:30
// TODO: remove `data` these once rails fully and exclusively support `signedData` (https://gitlab.com/gitlab-org/gitlab/-/issues/324873)
2021-02-22 17:27:13 +05:30
data := make(map[string]string)
signedData := make(map[string]string)
key := func(field string) string {
if prefix == "" {
return field
}
return fmt.Sprintf("%s.%s", prefix, field)
}
for k, v := range map[string]string{
2021-12-11 22:18:48 +05:30
"name": fh.Name,
"path": fh.LocalPath,
"remote_url": fh.RemoteURL,
"remote_id": fh.RemoteID,
"size": strconv.FormatInt(fh.Size, 10),
"upload_duration": strconv.FormatFloat(fh.uploadDuration, 'f', -1, 64),
2021-02-22 17:27:13 +05:30
} {
data[key(k)] = v
signedData[k] = v
}
for hashName, hash := range fh.hashes {
data[key(hashName)] = hash
signedData[hashName] = hash
}
2022-07-23 23:45:48 +05:30
claims := uploadClaims{Upload: signedData, RegisteredClaims: secret.DefaultClaims}
2021-02-22 17:27:13 +05:30
jwtData, err := secret.JWTTokenString(claims)
if err != nil {
return nil, err
}
data[key("gitlab-workhorse-upload")] = jwtData
return data, nil
}
type consumer interface {
Consume(context.Context, io.Reader, time.Time) (int64, error)
2023-03-17 16:20:25 +05:30
ConsumeWithoutDelete(context.Context, io.Reader, time.Time) (int64, error)
2021-02-22 17:27:13 +05:30
}
2022-05-07 20:08:51 +05:30
// Upload persists the provided reader content to all the location specified in opts. A cleanup will be performed once ctx is Done
2021-02-22 17:27:13 +05:30
// Make sure the provided context will not expire before finalizing upload with GitLab Rails.
2022-07-23 23:45:48 +05:30
func Upload(ctx context.Context, reader io.Reader, size int64, name string, opts *UploadOpts) (*FileHandler, error) {
2021-12-11 22:18:48 +05:30
fh := &FileHandler{
2022-07-23 23:45:48 +05:30
Name: name,
2021-02-22 17:27:13 +05:30
RemoteID: opts.RemoteID,
RemoteURL: opts.RemoteURL,
}
2021-12-11 22:18:48 +05:30
uploadStartTime := time.Now()
defer func() { fh.uploadDuration = time.Since(uploadStartTime).Seconds() }()
2023-03-17 16:20:25 +05:30
hashes := newMultiHash(opts.UploadHashFunctions)
2021-02-22 17:27:13 +05:30
reader = io.TeeReader(reader, hashes.Writer)
var clientMode string
2021-12-11 22:18:48 +05:30
var uploadDestination consumer
var err error
2021-02-22 17:27:13 +05:30
switch {
2022-06-21 17:19:12 +05:30
// This case means Workhorse is acting as an upload proxy for Rails and buffers files
// to disk in a temporary location, see:
// https://docs.gitlab.com/ee/development/uploads/background.html#moving-disk-buffering-to-workhorse
case opts.IsLocalTempFile():
clientMode = "local_tempfile"
2022-05-07 20:08:51 +05:30
uploadDestination, err = fh.newLocalFile(ctx, opts)
2022-06-21 17:19:12 +05:30
// All cases below mean we are doing a direct upload to remote i.e. object storage, see:
// https://docs.gitlab.com/ee/development/uploads/background.html#moving-to-object-storage-and-direct-uploads
2021-02-22 17:27:13 +05:30
case opts.UseWorkhorseClientEnabled() && opts.ObjectStorageConfig.IsGoCloud():
clientMode = fmt.Sprintf("go_cloud:%s", opts.ObjectStorageConfig.Provider)
p := &objectstore.GoCloudObjectParams{
Ctx: ctx,
Mux: opts.ObjectStorageConfig.URLMux,
BucketURL: opts.ObjectStorageConfig.GoCloudConfig.URL,
ObjectName: opts.RemoteTempObjectID,
}
uploadDestination, err = objectstore.NewGoCloudObject(p)
case opts.UseWorkhorseClientEnabled() && opts.ObjectStorageConfig.IsAWS() && opts.ObjectStorageConfig.IsValid():
2022-06-21 17:19:12 +05:30
clientMode = "s3_client"
2021-02-22 17:27:13 +05:30
uploadDestination, err = objectstore.NewS3Object(
opts.RemoteTempObjectID,
opts.ObjectStorageConfig.S3Credentials,
opts.ObjectStorageConfig.S3Config,
)
case opts.IsMultipart():
2022-06-21 17:19:12 +05:30
clientMode = "s3_multipart"
2021-02-22 17:27:13 +05:30
uploadDestination, err = objectstore.NewMultipart(
opts.PresignedParts,
opts.PresignedCompleteMultipart,
opts.PresignedAbortMultipart,
opts.PresignedDelete,
opts.PutHeaders,
opts.PartSize,
)
default:
2022-06-21 17:19:12 +05:30
clientMode = "presigned_put"
2021-02-22 17:27:13 +05:30
uploadDestination, err = objectstore.NewObject(
opts.PresignedPut,
opts.PresignedDelete,
opts.PutHeaders,
size,
)
}
if err != nil {
return nil, err
}
2021-12-11 22:18:48 +05:30
var hlr *hardLimitReader
2021-02-22 17:27:13 +05:30
if opts.MaximumSize > 0 {
if size > opts.MaximumSize {
return nil, SizeError(fmt.Errorf("the upload size %d is over maximum of %d bytes", size, opts.MaximumSize))
}
2021-12-11 22:18:48 +05:30
hlr = &hardLimitReader{r: reader, n: opts.MaximumSize}
2021-02-22 17:27:13 +05:30
reader = hlr
}
2023-03-17 16:20:25 +05:30
if opts.SkipDelete {
fh.Size, err = uploadDestination.ConsumeWithoutDelete(ctx, reader, opts.Deadline)
} else {
fh.Size, err = uploadDestination.Consume(ctx, reader, opts.Deadline)
}
2021-02-22 17:27:13 +05:30
if err != nil {
2021-12-11 22:18:48 +05:30
if (err == objectstore.ErrNotEnoughParts) || (hlr != nil && hlr.n < 0) {
2021-02-22 17:27:13 +05:30
err = ErrEntityTooLarge
}
return nil, err
}
if size != -1 && size != fh.Size {
return nil, SizeError(fmt.Errorf("expected %d bytes but got only %d", size, fh.Size))
}
logger := log.WithContextFields(ctx, log.Fields{
2022-07-23 23:45:48 +05:30
"copied_bytes": fh.Size,
"is_local": opts.IsLocalTempFile(),
"is_multipart": opts.IsMultipart(),
"is_remote": !opts.IsLocalTempFile(),
"remote_id": opts.RemoteID,
"client_mode": clientMode,
"filename": fh.Name,
2021-02-22 17:27:13 +05:30
})
2022-06-21 17:19:12 +05:30
if opts.IsLocalTempFile() {
2021-02-22 17:27:13 +05:30
logger = logger.WithField("local_temp_path", opts.LocalTempPath)
} else {
logger = logger.WithField("remote_temp_object", opts.RemoteTempObjectID)
}
logger.Info("saved file")
fh.hashes = hashes.finish()
return fh, nil
}
2022-05-07 20:08:51 +05:30
func (fh *FileHandler) newLocalFile(ctx context.Context, opts *UploadOpts) (consumer, error) {
2021-02-22 17:27:13 +05:30
// make sure TempFolder exists
err := os.MkdirAll(opts.LocalTempPath, 0700)
if err != nil {
2022-05-07 20:08:51 +05:30
return nil, fmt.Errorf("newLocalFile: mkdir %q: %v", opts.LocalTempPath, err)
2021-02-22 17:27:13 +05:30
}
2022-07-23 23:45:48 +05:30
file, err := os.CreateTemp(opts.LocalTempPath, "gitlab-workhorse-upload")
2021-02-22 17:27:13 +05:30
if err != nil {
2022-05-07 20:08:51 +05:30
return nil, fmt.Errorf("newLocalFile: create file: %v", err)
2021-02-22 17:27:13 +05:30
}
go func() {
<-ctx.Done()
os.Remove(file.Name())
}()
fh.LocalPath = file.Name()
2022-05-07 20:08:51 +05:30
return &filestore.LocalFile{File: file}, nil
2021-02-22 17:27:13 +05:30
}