2021-02-22 17:27:13 +05:30
|
|
|
/*
|
|
|
|
In this file we handle 'git archive' downloads
|
|
|
|
*/
|
|
|
|
|
|
|
|
package git
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"net/http"
|
|
|
|
"os"
|
|
|
|
"path"
|
|
|
|
"path/filepath"
|
|
|
|
"regexp"
|
|
|
|
"time"
|
|
|
|
|
2021-10-27 15:23:28 +05:30
|
|
|
"github.com/golang/protobuf/proto" //lint:ignore SA1019 https://gitlab.com/gitlab-org/gitlab/-/issues/324868
|
2021-02-22 17:27:13 +05:30
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
|
|
|
2022-08-13 15:12:31 +05:30
|
|
|
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
|
2021-02-22 17:27:13 +05:30
|
|
|
|
2022-07-16 23:28:13 +05:30
|
|
|
"gitlab.com/gitlab-org/gitlab/workhorse/internal/api"
|
2021-10-27 15:23:28 +05:30
|
|
|
"gitlab.com/gitlab-org/gitlab/workhorse/internal/gitaly"
|
2023-03-04 22:38:38 +05:30
|
|
|
"gitlab.com/gitlab-org/gitlab/workhorse/internal/helper/fail"
|
2021-10-27 15:23:28 +05:30
|
|
|
"gitlab.com/gitlab-org/gitlab/workhorse/internal/log"
|
|
|
|
"gitlab.com/gitlab-org/gitlab/workhorse/internal/senddata"
|
2021-02-22 17:27:13 +05:30
|
|
|
)
|
|
|
|
|
|
|
|
type archive struct{ senddata.Prefix }
|
|
|
|
type archiveParams struct {
|
|
|
|
ArchivePath string
|
|
|
|
ArchivePrefix string
|
|
|
|
CommitId string
|
2022-07-16 23:28:13 +05:30
|
|
|
GitalyServer api.GitalyServer
|
2021-02-22 17:27:13 +05:30
|
|
|
GitalyRepository gitalypb.Repository
|
|
|
|
DisableCache bool
|
|
|
|
GetArchiveRequest []byte
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
SendArchive = &archive{"git-archive:"}
|
|
|
|
gitArchiveCache = promauto.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Name: "gitlab_workhorse_git_archive_cache",
|
|
|
|
Help: "Cache hits and misses for 'git archive' streaming",
|
|
|
|
},
|
|
|
|
[]string{"result"},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
func (a *archive) Inject(w http.ResponseWriter, r *http.Request, sendData string) {
|
|
|
|
var params archiveParams
|
|
|
|
if err := a.Unpack(¶ms, sendData); err != nil {
|
2023-03-04 22:38:38 +05:30
|
|
|
fail.Request(w, r, fmt.Errorf("SendArchive: unpack sendData: %v", err))
|
2021-02-22 17:27:13 +05:30
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
urlPath := r.URL.Path
|
|
|
|
format, ok := parseBasename(filepath.Base(urlPath))
|
|
|
|
if !ok {
|
2023-03-04 22:38:38 +05:30
|
|
|
fail.Request(w, r, fmt.Errorf("SendArchive: invalid format: %s", urlPath))
|
2021-02-22 17:27:13 +05:30
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
cacheEnabled := !params.DisableCache
|
|
|
|
archiveFilename := path.Base(params.ArchivePath)
|
|
|
|
|
|
|
|
if cacheEnabled {
|
|
|
|
cachedArchive, err := os.Open(params.ArchivePath)
|
|
|
|
if err == nil {
|
|
|
|
defer cachedArchive.Close()
|
|
|
|
gitArchiveCache.WithLabelValues("hit").Inc()
|
|
|
|
setArchiveHeaders(w, format, archiveFilename)
|
|
|
|
// Even if somebody deleted the cachedArchive from disk since we opened
|
|
|
|
// the file, Unix file semantics guarantee we can still read from the
|
|
|
|
// open file in this process.
|
|
|
|
http.ServeContent(w, r, "", time.Unix(0, 0), cachedArchive)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
gitArchiveCache.WithLabelValues("miss").Inc()
|
|
|
|
|
|
|
|
var tempFile *os.File
|
|
|
|
var err error
|
|
|
|
|
|
|
|
if cacheEnabled {
|
|
|
|
// We assume the tempFile has a unique name so that concurrent requests are
|
|
|
|
// safe. We create the tempfile in the same directory as the final cached
|
|
|
|
// archive we want to create so that we can use an atomic link(2) operation
|
|
|
|
// to finalize the cached archive.
|
|
|
|
tempFile, err = prepareArchiveTempfile(path.Dir(params.ArchivePath), archiveFilename)
|
|
|
|
if err != nil {
|
2023-03-04 22:38:38 +05:30
|
|
|
fail.Request(w, r, fmt.Errorf("SendArchive: create tempfile: %v", err))
|
2021-02-22 17:27:13 +05:30
|
|
|
return
|
|
|
|
}
|
|
|
|
defer tempFile.Close()
|
|
|
|
defer os.Remove(tempFile.Name())
|
|
|
|
}
|
|
|
|
|
|
|
|
var archiveReader io.Reader
|
|
|
|
|
2021-11-18 22:05:49 +05:30
|
|
|
archiveReader, err = handleArchiveWithGitaly(r, ¶ms, format)
|
2021-02-22 17:27:13 +05:30
|
|
|
if err != nil {
|
2023-03-04 22:38:38 +05:30
|
|
|
fail.Request(w, r, fmt.Errorf("operations.GetArchive: %v", err))
|
2021-02-22 17:27:13 +05:30
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
reader := archiveReader
|
|
|
|
if cacheEnabled {
|
|
|
|
reader = io.TeeReader(archiveReader, tempFile)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Start writing the response
|
|
|
|
setArchiveHeaders(w, format, archiveFilename)
|
|
|
|
w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return
|
|
|
|
if _, err := io.Copy(w, reader); err != nil {
|
2021-03-08 18:12:59 +05:30
|
|
|
log.WithRequest(r).WithError(©Error{fmt.Errorf("SendArchive: copy 'git archive' output: %v", err)}).Error()
|
2021-02-22 17:27:13 +05:30
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if cacheEnabled {
|
|
|
|
err := finalizeCachedArchive(tempFile, params.ArchivePath)
|
|
|
|
if err != nil {
|
2021-03-08 18:12:59 +05:30
|
|
|
log.WithRequest(r).WithError(fmt.Errorf("SendArchive: finalize cached archive: %v", err)).Error()
|
2021-02-22 17:27:13 +05:30
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-18 22:05:49 +05:30
|
|
|
func handleArchiveWithGitaly(r *http.Request, params *archiveParams, format gitalypb.GetArchiveRequest_Format) (io.Reader, error) {
|
2021-02-22 17:27:13 +05:30
|
|
|
var request *gitalypb.GetArchiveRequest
|
2023-03-04 22:38:38 +05:30
|
|
|
ctx, c, err := gitaly.NewRepositoryClient(r.Context(), params.GitalyServer)
|
2022-07-16 23:28:13 +05:30
|
|
|
|
2021-02-22 17:27:13 +05:30
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if params.GetArchiveRequest != nil {
|
|
|
|
request = &gitalypb.GetArchiveRequest{}
|
|
|
|
|
|
|
|
if err := proto.Unmarshal(params.GetArchiveRequest, request); err != nil {
|
|
|
|
return nil, fmt.Errorf("unmarshal GetArchiveRequest: %v", err)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
request = &gitalypb.GetArchiveRequest{
|
|
|
|
Repository: ¶ms.GitalyRepository,
|
|
|
|
CommitId: params.CommitId,
|
|
|
|
Prefix: params.ArchivePrefix,
|
|
|
|
Format: format,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return c.ArchiveReader(ctx, request)
|
|
|
|
}
|
|
|
|
|
|
|
|
func setArchiveHeaders(w http.ResponseWriter, format gitalypb.GetArchiveRequest_Format, archiveFilename string) {
|
|
|
|
w.Header().Del("Content-Length")
|
|
|
|
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, archiveFilename))
|
|
|
|
// Caching proxies usually don't cache responses with Set-Cookie header
|
|
|
|
// present because it implies user-specific data, which is not the case
|
|
|
|
// for repository archives.
|
|
|
|
w.Header().Del("Set-Cookie")
|
|
|
|
if format == gitalypb.GetArchiveRequest_ZIP {
|
|
|
|
w.Header().Set("Content-Type", "application/zip")
|
|
|
|
} else {
|
|
|
|
w.Header().Set("Content-Type", "application/octet-stream")
|
|
|
|
}
|
|
|
|
w.Header().Set("Content-Transfer-Encoding", "binary")
|
|
|
|
}
|
|
|
|
|
|
|
|
func prepareArchiveTempfile(dir string, prefix string) (*os.File, error) {
|
|
|
|
if err := os.MkdirAll(dir, 0700); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-07-23 23:45:48 +05:30
|
|
|
return os.CreateTemp(dir, prefix)
|
2021-02-22 17:27:13 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
func finalizeCachedArchive(tempFile *os.File, archivePath string) error {
|
|
|
|
if err := tempFile.Close(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := os.Link(tempFile.Name(), archivePath); err != nil && !os.IsExist(err) {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
patternZip = regexp.MustCompile(`\.zip$`)
|
|
|
|
patternTar = regexp.MustCompile(`\.tar$`)
|
|
|
|
patternTarGz = regexp.MustCompile(`\.(tar\.gz|tgz|gz)$`)
|
|
|
|
patternTarBz2 = regexp.MustCompile(`\.(tar\.bz2|tbz|tbz2|tb2|bz2)$`)
|
|
|
|
)
|
|
|
|
|
|
|
|
func parseBasename(basename string) (gitalypb.GetArchiveRequest_Format, bool) {
|
|
|
|
var format gitalypb.GetArchiveRequest_Format
|
|
|
|
|
|
|
|
switch {
|
|
|
|
case (basename == "archive"):
|
|
|
|
format = gitalypb.GetArchiveRequest_TAR_GZ
|
|
|
|
case patternZip.MatchString(basename):
|
|
|
|
format = gitalypb.GetArchiveRequest_ZIP
|
|
|
|
case patternTar.MatchString(basename):
|
|
|
|
format = gitalypb.GetArchiveRequest_TAR
|
|
|
|
case patternTarGz.MatchString(basename):
|
|
|
|
format = gitalypb.GetArchiveRequest_TAR_GZ
|
|
|
|
case patternTarBz2.MatchString(basename):
|
|
|
|
format = gitalypb.GetArchiveRequest_TAR_BZ2
|
|
|
|
default:
|
|
|
|
return format, false
|
|
|
|
}
|
|
|
|
|
|
|
|
return format, true
|
|
|
|
}
|