debian-mirror-gitlab/workhorse/internal/git/archive.go
2021-11-18 22:05:49 +05:30

217 lines
6.2 KiB
Go

/*
In this file we handle 'git archive' downloads
*/
package git
import (
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path"
"path/filepath"
"regexp"
"time"
"github.com/golang/protobuf/proto" //lint:ignore SA1019 https://gitlab.com/gitlab-org/gitlab/-/issues/324868
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/gitaly"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/helper"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/log"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/senddata"
)
type archive struct{ senddata.Prefix }
type archiveParams struct {
ArchivePath string
ArchivePrefix string
CommitId string
GitalyServer gitaly.Server
GitalyRepository gitalypb.Repository
DisableCache bool
GetArchiveRequest []byte
}
var (
SendArchive = &archive{"git-archive:"}
gitArchiveCache = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "gitlab_workhorse_git_archive_cache",
Help: "Cache hits and misses for 'git archive' streaming",
},
[]string{"result"},
)
)
func (a *archive) Inject(w http.ResponseWriter, r *http.Request, sendData string) {
var params archiveParams
if err := a.Unpack(&params, sendData); err != nil {
helper.Fail500(w, r, fmt.Errorf("SendArchive: unpack sendData: %v", err))
return
}
urlPath := r.URL.Path
format, ok := parseBasename(filepath.Base(urlPath))
if !ok {
helper.Fail500(w, r, fmt.Errorf("SendArchive: invalid format: %s", urlPath))
return
}
cacheEnabled := !params.DisableCache
archiveFilename := path.Base(params.ArchivePath)
if cacheEnabled {
cachedArchive, err := os.Open(params.ArchivePath)
if err == nil {
defer cachedArchive.Close()
gitArchiveCache.WithLabelValues("hit").Inc()
setArchiveHeaders(w, format, archiveFilename)
// Even if somebody deleted the cachedArchive from disk since we opened
// the file, Unix file semantics guarantee we can still read from the
// open file in this process.
http.ServeContent(w, r, "", time.Unix(0, 0), cachedArchive)
return
}
}
gitArchiveCache.WithLabelValues("miss").Inc()
var tempFile *os.File
var err error
if cacheEnabled {
// We assume the tempFile has a unique name so that concurrent requests are
// safe. We create the tempfile in the same directory as the final cached
// archive we want to create so that we can use an atomic link(2) operation
// to finalize the cached archive.
tempFile, err = prepareArchiveTempfile(path.Dir(params.ArchivePath), archiveFilename)
if err != nil {
helper.Fail500(w, r, fmt.Errorf("SendArchive: create tempfile: %v", err))
return
}
defer tempFile.Close()
defer os.Remove(tempFile.Name())
}
var archiveReader io.Reader
archiveReader, err = handleArchiveWithGitaly(r, &params, format)
if err != nil {
helper.Fail500(w, r, fmt.Errorf("operations.GetArchive: %v", err))
return
}
reader := archiveReader
if cacheEnabled {
reader = io.TeeReader(archiveReader, tempFile)
}
// Start writing the response
setArchiveHeaders(w, format, archiveFilename)
w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return
if _, err := io.Copy(w, reader); err != nil {
log.WithRequest(r).WithError(&copyError{fmt.Errorf("SendArchive: copy 'git archive' output: %v", err)}).Error()
return
}
if cacheEnabled {
err := finalizeCachedArchive(tempFile, params.ArchivePath)
if err != nil {
log.WithRequest(r).WithError(fmt.Errorf("SendArchive: finalize cached archive: %v", err)).Error()
return
}
}
}
func handleArchiveWithGitaly(r *http.Request, params *archiveParams, format gitalypb.GetArchiveRequest_Format) (io.Reader, error) {
var request *gitalypb.GetArchiveRequest
ctx, c, err := gitaly.NewRepositoryClient(r.Context(), params.GitalyServer)
if err != nil {
return nil, err
}
if params.GetArchiveRequest != nil {
request = &gitalypb.GetArchiveRequest{}
if err := proto.Unmarshal(params.GetArchiveRequest, request); err != nil {
return nil, fmt.Errorf("unmarshal GetArchiveRequest: %v", err)
}
} else {
request = &gitalypb.GetArchiveRequest{
Repository: &params.GitalyRepository,
CommitId: params.CommitId,
Prefix: params.ArchivePrefix,
Format: format,
}
}
return c.ArchiveReader(ctx, request)
}
func setArchiveHeaders(w http.ResponseWriter, format gitalypb.GetArchiveRequest_Format, archiveFilename string) {
w.Header().Del("Content-Length")
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, archiveFilename))
// Caching proxies usually don't cache responses with Set-Cookie header
// present because it implies user-specific data, which is not the case
// for repository archives.
w.Header().Del("Set-Cookie")
if format == gitalypb.GetArchiveRequest_ZIP {
w.Header().Set("Content-Type", "application/zip")
} else {
w.Header().Set("Content-Type", "application/octet-stream")
}
w.Header().Set("Content-Transfer-Encoding", "binary")
}
func prepareArchiveTempfile(dir string, prefix string) (*os.File, error) {
if err := os.MkdirAll(dir, 0700); err != nil {
return nil, err
}
return ioutil.TempFile(dir, prefix)
}
func finalizeCachedArchive(tempFile *os.File, archivePath string) error {
if err := tempFile.Close(); err != nil {
return err
}
if err := os.Link(tempFile.Name(), archivePath); err != nil && !os.IsExist(err) {
return err
}
return nil
}
var (
patternZip = regexp.MustCompile(`\.zip$`)
patternTar = regexp.MustCompile(`\.tar$`)
patternTarGz = regexp.MustCompile(`\.(tar\.gz|tgz|gz)$`)
patternTarBz2 = regexp.MustCompile(`\.(tar\.bz2|tbz|tbz2|tb2|bz2)$`)
)
func parseBasename(basename string) (gitalypb.GetArchiveRequest_Format, bool) {
var format gitalypb.GetArchiveRequest_Format
switch {
case (basename == "archive"):
format = gitalypb.GetArchiveRequest_TAR_GZ
case patternZip.MatchString(basename):
format = gitalypb.GetArchiveRequest_ZIP
case patternTar.MatchString(basename):
format = gitalypb.GetArchiveRequest_TAR
case patternTarGz.MatchString(basename):
format = gitalypb.GetArchiveRequest_TAR_GZ
case patternTarBz2.MatchString(basename):
format = gitalypb.GetArchiveRequest_TAR_BZ2
default:
return format, false
}
return format, true
}