debian-mirror-gitlab/workhorse/internal/upload/destination/destination.go
2022-07-23 20:15:48 +02:00

240 lines
7.1 KiB
Go

// The destination package handles uploading to a specific destination (delegates
// to filestore or objectstore packages) based on options from the pre-authorization
// API and finalizing the upload.
package destination
import (
"context"
"errors"
"fmt"
"io"
"os"
"strconv"
"time"
"github.com/golang-jwt/jwt/v4"
"gitlab.com/gitlab-org/labkit/log"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/secret"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/destination/filestore"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/destination/objectstore"
)
type SizeError error
// ErrEntityTooLarge means that the uploaded content is bigger then maximum allowed size
var ErrEntityTooLarge = errors.New("entity is too large")
// FileHandler represent a file that has been processed for upload
// it may be either uploaded to an ObjectStore and/or saved on local path.
type FileHandler struct {
// LocalPath is the path on the disk where file has been stored
LocalPath string
// RemoteID is the objectID provided by GitLab Rails
RemoteID string
// RemoteURL is ObjectStore URL provided by GitLab Rails
RemoteURL string
// Size is the persisted file size
Size int64
// Name is the resource name to send back to GitLab rails.
// It differ from the real file name in order to avoid file collisions
Name string
// a map containing different hashes
hashes map[string]string
// Duration of upload in seconds
uploadDuration float64
}
type uploadClaims struct {
Upload map[string]string `json:"upload"`
jwt.RegisteredClaims
}
// SHA256 hash of the handled file
func (fh *FileHandler) SHA256() string {
return fh.hashes["sha256"]
}
// MD5 hash of the handled file
func (fh *FileHandler) MD5() string {
return fh.hashes["md5"]
}
// GitLabFinalizeFields returns a map with all the fields GitLab Rails needs in order to finalize the upload.
func (fh *FileHandler) GitLabFinalizeFields(prefix string) (map[string]string, error) {
// TODO: remove `data` these once rails fully and exclusively support `signedData` (https://gitlab.com/gitlab-org/gitlab/-/issues/324873)
data := make(map[string]string)
signedData := make(map[string]string)
key := func(field string) string {
if prefix == "" {
return field
}
return fmt.Sprintf("%s.%s", prefix, field)
}
for k, v := range map[string]string{
"name": fh.Name,
"path": fh.LocalPath,
"remote_url": fh.RemoteURL,
"remote_id": fh.RemoteID,
"size": strconv.FormatInt(fh.Size, 10),
"upload_duration": strconv.FormatFloat(fh.uploadDuration, 'f', -1, 64),
} {
data[key(k)] = v
signedData[k] = v
}
for hashName, hash := range fh.hashes {
data[key(hashName)] = hash
signedData[hashName] = hash
}
claims := uploadClaims{Upload: signedData, RegisteredClaims: secret.DefaultClaims}
jwtData, err := secret.JWTTokenString(claims)
if err != nil {
return nil, err
}
data[key("gitlab-workhorse-upload")] = jwtData
return data, nil
}
type consumer interface {
Consume(context.Context, io.Reader, time.Time) (int64, error)
}
// Upload persists the provided reader content to all the location specified in opts. A cleanup will be performed once ctx is Done
// Make sure the provided context will not expire before finalizing upload with GitLab Rails.
func Upload(ctx context.Context, reader io.Reader, size int64, name string, opts *UploadOpts) (*FileHandler, error) {
fh := &FileHandler{
Name: name,
RemoteID: opts.RemoteID,
RemoteURL: opts.RemoteURL,
}
uploadStartTime := time.Now()
defer func() { fh.uploadDuration = time.Since(uploadStartTime).Seconds() }()
hashes := newMultiHash()
reader = io.TeeReader(reader, hashes.Writer)
var clientMode string
var uploadDestination consumer
var err error
switch {
// This case means Workhorse is acting as an upload proxy for Rails and buffers files
// to disk in a temporary location, see:
// https://docs.gitlab.com/ee/development/uploads/background.html#moving-disk-buffering-to-workhorse
case opts.IsLocalTempFile():
clientMode = "local_tempfile"
uploadDestination, err = fh.newLocalFile(ctx, opts)
// All cases below mean we are doing a direct upload to remote i.e. object storage, see:
// https://docs.gitlab.com/ee/development/uploads/background.html#moving-to-object-storage-and-direct-uploads
case opts.UseWorkhorseClientEnabled() && opts.ObjectStorageConfig.IsGoCloud():
clientMode = fmt.Sprintf("go_cloud:%s", opts.ObjectStorageConfig.Provider)
p := &objectstore.GoCloudObjectParams{
Ctx: ctx,
Mux: opts.ObjectStorageConfig.URLMux,
BucketURL: opts.ObjectStorageConfig.GoCloudConfig.URL,
ObjectName: opts.RemoteTempObjectID,
}
uploadDestination, err = objectstore.NewGoCloudObject(p)
case opts.UseWorkhorseClientEnabled() && opts.ObjectStorageConfig.IsAWS() && opts.ObjectStorageConfig.IsValid():
clientMode = "s3_client"
uploadDestination, err = objectstore.NewS3Object(
opts.RemoteTempObjectID,
opts.ObjectStorageConfig.S3Credentials,
opts.ObjectStorageConfig.S3Config,
)
case opts.IsMultipart():
clientMode = "s3_multipart"
uploadDestination, err = objectstore.NewMultipart(
opts.PresignedParts,
opts.PresignedCompleteMultipart,
opts.PresignedAbortMultipart,
opts.PresignedDelete,
opts.PutHeaders,
opts.PartSize,
)
default:
clientMode = "presigned_put"
uploadDestination, err = objectstore.NewObject(
opts.PresignedPut,
opts.PresignedDelete,
opts.PutHeaders,
size,
)
}
if err != nil {
return nil, err
}
var hlr *hardLimitReader
if opts.MaximumSize > 0 {
if size > opts.MaximumSize {
return nil, SizeError(fmt.Errorf("the upload size %d is over maximum of %d bytes", size, opts.MaximumSize))
}
hlr = &hardLimitReader{r: reader, n: opts.MaximumSize}
reader = hlr
}
fh.Size, err = uploadDestination.Consume(ctx, reader, opts.Deadline)
if err != nil {
if (err == objectstore.ErrNotEnoughParts) || (hlr != nil && hlr.n < 0) {
err = ErrEntityTooLarge
}
return nil, err
}
if size != -1 && size != fh.Size {
return nil, SizeError(fmt.Errorf("expected %d bytes but got only %d", size, fh.Size))
}
logger := log.WithContextFields(ctx, log.Fields{
"copied_bytes": fh.Size,
"is_local": opts.IsLocalTempFile(),
"is_multipart": opts.IsMultipart(),
"is_remote": !opts.IsLocalTempFile(),
"remote_id": opts.RemoteID,
"client_mode": clientMode,
"filename": fh.Name,
})
if opts.IsLocalTempFile() {
logger = logger.WithField("local_temp_path", opts.LocalTempPath)
} else {
logger = logger.WithField("remote_temp_object", opts.RemoteTempObjectID)
}
logger.Info("saved file")
fh.hashes = hashes.finish()
return fh, nil
}
func (fh *FileHandler) newLocalFile(ctx context.Context, opts *UploadOpts) (consumer, error) {
// make sure TempFolder exists
err := os.MkdirAll(opts.LocalTempPath, 0700)
if err != nil {
return nil, fmt.Errorf("newLocalFile: mkdir %q: %v", opts.LocalTempPath, err)
}
file, err := os.CreateTemp(opts.LocalTempPath, "gitlab-workhorse-upload")
if err != nil {
return nil, fmt.Errorf("newLocalFile: create file: %v", err)
}
go func() {
<-ctx.Done()
os.Remove(file.Name())
}()
fh.LocalPath = file.Name()
return &filestore.LocalFile{File: file}, nil
}