debian-mirror-gitlab/workhorse/internal/upstream/routes.go
2022-07-23 20:15:48 +02:00

419 lines
15 KiB
Go

package upstream
import (
"net/http"
"net/url"
"path"
"regexp"
"github.com/gorilla/websocket"
"gitlab.com/gitlab-org/labkit/log"
"gitlab.com/gitlab-org/labkit/tracing"
apipkg "gitlab.com/gitlab-org/gitlab/workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/artifacts"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/builds"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/channel"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/config"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/dependencyproxy"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/git"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/helper"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/imageresizer"
proxypkg "gitlab.com/gitlab-org/gitlab/workhorse/internal/proxy"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/queueing"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/redis"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/secret"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/senddata"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/sendfile"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/sendurl"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/staticpages"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/upload"
)
type matcherFunc func(*http.Request) bool
type routeEntry struct {
method string
regex *regexp.Regexp
handler http.Handler
matchers []matcherFunc
}
type routeOptions struct {
tracing bool
isGeoProxyRoute bool
matchers []matcherFunc
}
const (
apiPattern = `^/api/`
ciAPIPattern = `^/ci/api/`
gitProjectPattern = `^/.+\.git/`
geoGitProjectPattern = `^/[^-].+\.git/` // Prevent matching routes like /-/push_from_secondary
projectPattern = `^/([^/]+/){1,}[^/]+/`
apiProjectPattern = apiPattern + `v4/projects/[^/]+/` // API: Projects can be encoded via group%2Fsubgroup%2Fproject
apiTopicPattern = apiPattern + `v4/topics`
snippetUploadPattern = `^/uploads/personal_snippet`
userUploadPattern = `^/uploads/user`
importPattern = `^/import/`
)
var (
// For legacy reasons, user uploads are stored in public/uploads. To
// prevent anybody who knows/guesses the URL of a user-uploaded file
// from downloading it we configure static.ServeExisting to treat files
// under public/uploads/ as if they do not exist.
staticExclude = []string{"/uploads/"}
)
func compileRegexp(regexpStr string) *regexp.Regexp {
if len(regexpStr) == 0 {
return nil
}
return regexp.MustCompile(regexpStr)
}
func withMatcher(f matcherFunc) func(*routeOptions) {
return func(options *routeOptions) {
options.matchers = append(options.matchers, f)
}
}
func withoutTracing() func(*routeOptions) {
return func(options *routeOptions) {
options.tracing = false
}
}
func withGeoProxy() func(*routeOptions) {
return func(options *routeOptions) {
options.isGeoProxyRoute = true
}
}
func (u *upstream) observabilityMiddlewares(handler http.Handler, method string, regexpStr string, opts *routeOptions) http.Handler {
handler = log.AccessLogger(
handler,
log.WithAccessLogger(u.accessLogger),
log.WithExtraFields(func(r *http.Request) log.Fields {
return log.Fields{
"route": regexpStr, // This field matches the `route` label in Prometheus metrics
}
}),
)
handler = instrumentRoute(handler, method, regexpStr) // Add prometheus metrics
if opts != nil && opts.isGeoProxyRoute {
handler = instrumentGeoProxyRoute(handler, method, regexpStr) // Add Geo prometheus metrics
}
return handler
}
func (u *upstream) route(method, regexpStr string, handler http.Handler, opts ...func(*routeOptions)) routeEntry {
// Instantiate a route with the defaults
options := routeOptions{
tracing: true,
}
for _, f := range opts {
f(&options)
}
handler = u.observabilityMiddlewares(handler, method, regexpStr, &options)
handler = denyWebsocket(handler) // Disallow websockets
if options.tracing {
// Add distributed tracing
handler = tracing.Handler(handler, tracing.WithRouteIdentifier(regexpStr))
}
return routeEntry{
method: method,
regex: compileRegexp(regexpStr),
handler: handler,
matchers: options.matchers,
}
}
func (u *upstream) wsRoute(regexpStr string, handler http.Handler, matchers ...matcherFunc) routeEntry {
method := "GET"
handler = u.observabilityMiddlewares(handler, method, regexpStr, nil)
return routeEntry{
method: method,
regex: compileRegexp(regexpStr),
handler: handler,
matchers: append(matchers, websocket.IsWebSocketUpgrade),
}
}
// Creates matcherFuncs for a particular content type.
func isContentType(contentType string) func(*http.Request) bool {
return func(r *http.Request) bool {
return helper.IsContentType(contentType, r.Header.Get("Content-Type"))
}
}
func (ro *routeEntry) isMatch(cleanedPath string, req *http.Request) bool {
if ro.method != "" && req.Method != ro.method {
return false
}
if ro.regex != nil && !ro.regex.MatchString(cleanedPath) {
return false
}
ok := true
for _, matcher := range ro.matchers {
ok = matcher(req)
if !ok {
break
}
}
return ok
}
func buildProxy(backend *url.URL, version string, rt http.RoundTripper, cfg config.Config, dependencyProxyInjector *dependencyproxy.Injector) http.Handler {
proxier := proxypkg.NewProxy(backend, version, rt)
return senddata.SendData(
sendfile.SendFile(apipkg.Block(proxier)),
git.SendArchive,
git.SendBlob,
git.SendDiff,
git.SendPatch,
git.SendSnapshot,
artifacts.SendEntry,
sendurl.SendURL,
imageresizer.NewResizer(cfg),
dependencyProxyInjector,
)
}
// Routing table
// We match against URI not containing the relativeUrlRoot:
// see upstream.ServeHTTP
func configureRoutes(u *upstream) {
api := u.APIClient
static := &staticpages.Static{DocumentRoot: u.DocumentRoot, Exclude: staticExclude}
dependencyProxyInjector := dependencyproxy.NewInjector()
proxy := buildProxy(u.Backend, u.Version, u.RoundTripper, u.Config, dependencyProxyInjector)
cableProxy := proxypkg.NewProxy(u.CableBackend, u.Version, u.CableRoundTripper)
assetsNotFoundHandler := NotFoundUnless(u.DevelopmentMode, proxy)
if u.AltDocumentRoot != "" {
altStatic := &staticpages.Static{DocumentRoot: u.AltDocumentRoot, Exclude: staticExclude}
assetsNotFoundHandler = altStatic.ServeExisting(
u.URLPrefix,
staticpages.CacheExpireMax,
NotFoundUnless(u.DevelopmentMode, proxy),
)
}
signingTripper := secret.NewRoundTripper(u.RoundTripper, u.Version)
signingProxy := buildProxy(u.Backend, u.Version, signingTripper, u.Config, dependencyProxyInjector)
preparer := upload.NewObjectStoragePreparer(u.Config)
requestBodyUploader := upload.RequestBody(api, signingProxy, preparer)
mimeMultipartUploader := upload.Multipart(api, signingProxy, preparer)
uploadPath := path.Join(u.DocumentRoot, "uploads/tmp")
tempfileMultipartProxy := upload.SkipRailsPreAuthMultipart(uploadPath, api, proxy, preparer)
ciAPIProxyQueue := queueing.QueueRequests("ci_api_job_requests", tempfileMultipartProxy, u.APILimit, u.APIQueueLimit, u.APIQueueTimeout)
ciAPILongPolling := builds.RegisterHandler(ciAPIProxyQueue, redis.WatchKey, u.APICILongPollingDuration)
dependencyProxyInjector.SetUploadHandler(requestBodyUploader)
// Serve static files or forward the requests
defaultUpstream := static.ServeExisting(
u.URLPrefix,
staticpages.CacheDisabled,
static.DeployPage(static.ErrorPagesUnless(u.DevelopmentMode, staticpages.ErrorFormatHTML, tempfileMultipartProxy)),
)
probeUpstream := static.ErrorPagesUnless(u.DevelopmentMode, staticpages.ErrorFormatJSON, proxy)
healthUpstream := static.ErrorPagesUnless(u.DevelopmentMode, staticpages.ErrorFormatText, proxy)
u.Routes = []routeEntry{
// Git Clone
u.route("GET", gitProjectPattern+`info/refs\z`, git.GetInfoRefsHandler(api)),
u.route("POST", gitProjectPattern+`git-upload-pack\z`, contentEncodingHandler(git.UploadPack(api)), withMatcher(isContentType("application/x-git-upload-pack-request"))),
u.route("POST", gitProjectPattern+`git-receive-pack\z`, contentEncodingHandler(git.ReceivePack(api)), withMatcher(isContentType("application/x-git-receive-pack-request"))),
u.route("PUT", gitProjectPattern+`gitlab-lfs/objects/([0-9a-f]{64})/([0-9]+)\z`, requestBodyUploader, withMatcher(isContentType("application/octet-stream"))),
// CI Artifacts
u.route("POST", apiPattern+`v4/jobs/[0-9]+/artifacts\z`, contentEncodingHandler(upload.Artifacts(api, signingProxy, preparer))),
u.route("POST", ciAPIPattern+`v1/builds/[0-9]+/artifacts\z`, contentEncodingHandler(upload.Artifacts(api, signingProxy, preparer))),
// ActionCable websocket
u.wsRoute(`^/-/cable\z`, cableProxy),
// Terminal websocket
u.wsRoute(projectPattern+`-/environments/[0-9]+/terminal.ws\z`, channel.Handler(api)),
u.wsRoute(projectPattern+`-/jobs/[0-9]+/terminal.ws\z`, channel.Handler(api)),
// Proxy Job Services
u.wsRoute(projectPattern+`-/jobs/[0-9]+/proxy.ws\z`, channel.Handler(api)),
// Long poll and limit capacity given to jobs/request and builds/register.json
u.route("", apiPattern+`v4/jobs/request\z`, ciAPILongPolling),
u.route("", ciAPIPattern+`v1/builds/register.json\z`, ciAPILongPolling),
// Not all API endpoints support encoded project IDs
// (e.g. `group%2Fproject`), but for the sake of consistency we
// use the apiProjectPattern regex throughout. API endpoints
// that do not support this will return 400 regardless of
// whether they are accelerated by Workhorse or not. See
// https://gitlab.com/gitlab-org/gitlab/-/merge_requests/56731.
// Maven Artifact Repository
u.route("PUT", apiProjectPattern+`packages/maven/`, requestBodyUploader),
// Conan Artifact Repository
u.route("PUT", apiPattern+`v4/packages/conan/`, requestBodyUploader),
u.route("PUT", apiProjectPattern+`packages/conan/`, requestBodyUploader),
// Generic Packages Repository
u.route("PUT", apiProjectPattern+`packages/generic/`, requestBodyUploader),
// NuGet Artifact Repository
u.route("PUT", apiProjectPattern+`packages/nuget/`, mimeMultipartUploader),
// PyPI Artifact Repository
u.route("POST", apiProjectPattern+`packages/pypi`, mimeMultipartUploader),
// Debian Artifact Repository
u.route("PUT", apiProjectPattern+`packages/debian/`, requestBodyUploader),
// Gem Artifact Repository
u.route("POST", apiProjectPattern+`packages/rubygems/`, requestBodyUploader),
// Terraform Module Package Repository
u.route("PUT", apiProjectPattern+`packages/terraform/modules/`, requestBodyUploader),
// Helm Artifact Repository
u.route("POST", apiProjectPattern+`packages/helm/api/[^/]+/charts\z`, mimeMultipartUploader),
// We are porting API to disk acceleration
// we need to declare each routes until we have fixed all the routes on the rails codebase.
// Overall status can be seen at https://gitlab.com/groups/gitlab-org/-/epics/1802#current-status
u.route("POST", apiProjectPattern+`wikis/attachments\z`, tempfileMultipartProxy),
u.route("POST", apiPattern+`graphql\z`, tempfileMultipartProxy),
u.route("POST", apiTopicPattern, tempfileMultipartProxy),
u.route("PUT", apiTopicPattern, tempfileMultipartProxy),
u.route("POST", apiPattern+`v4/groups/import`, mimeMultipartUploader),
u.route("POST", apiPattern+`v4/projects/import`, mimeMultipartUploader),
// Project Import via UI upload acceleration
u.route("POST", importPattern+`gitlab_project`, mimeMultipartUploader),
// Group Import via UI upload acceleration
u.route("POST", importPattern+`gitlab_group`, mimeMultipartUploader),
// Issuable Metric image upload
u.route("POST", apiProjectPattern+`issues/[0-9]+/metric_images\z`, mimeMultipartUploader),
// Alert Metric image upload
u.route("POST", apiProjectPattern+`alert_management_alerts/[0-9]+/metric_images\z`, mimeMultipartUploader),
// Requirements Import via UI upload acceleration
u.route("POST", projectPattern+`requirements_management/requirements/import_csv`, mimeMultipartUploader),
// Uploads via API
u.route("POST", apiProjectPattern+`uploads\z`, mimeMultipartUploader),
// Explicitly proxy API requests
u.route("", apiPattern, proxy),
u.route("", ciAPIPattern, proxy),
// Serve assets
u.route(
"", `^/assets/`,
static.ServeExisting(
u.URLPrefix,
staticpages.CacheExpireMax,
assetsNotFoundHandler,
),
withoutTracing(), // Tracing on assets is very noisy
),
// Uploads
u.route("POST", projectPattern+`uploads\z`, mimeMultipartUploader),
u.route("POST", snippetUploadPattern, mimeMultipartUploader),
u.route("POST", userUploadPattern, mimeMultipartUploader),
// health checks don't intercept errors and go straight to rails
// TODO: We should probably not return a HTML deploy page?
// https://gitlab.com/gitlab-org/gitlab/-/issues/336326
u.route("", "^/-/(readiness|liveness)$", static.DeployPage(probeUpstream)),
u.route("", "^/-/health$", static.DeployPage(healthUpstream)),
// This route lets us filter out health checks from our metrics.
u.route("", "^/-/", defaultUpstream),
u.route("", "", defaultUpstream),
}
// Routes which should actually be served locally by a Geo Proxy. If none
// matches, then then proxy the request.
u.geoLocalRoutes = []routeEntry{
// Git and LFS requests
//
// Note that Geo already redirects pushes, with special terminal output.
// Note that excessive secondary lag can cause unexpected behavior since
// pulls are performed against a different source of truth. Ideally, we'd
// proxy/redirect pulls as well, when the secondary is not up-to-date.
//
u.route("GET", geoGitProjectPattern+`info/refs\z`, git.GetInfoRefsHandler(api)),
u.route("POST", geoGitProjectPattern+`git-upload-pack\z`, contentEncodingHandler(git.UploadPack(api)), withMatcher(isContentType("application/x-git-upload-pack-request"))),
u.route("GET", geoGitProjectPattern+`gitlab-lfs/objects/([0-9a-f]{64})\z`, defaultUpstream),
// Serve health checks from this Geo secondary
u.route("", "^/-/(readiness|liveness)$", static.DeployPage(probeUpstream)),
u.route("", "^/-/health$", static.DeployPage(healthUpstream)),
u.route("", "^/-/metrics$", defaultUpstream),
// Authentication routes
u.route("", "^/users/auth/geo/(sign_in|sign_out)$", defaultUpstream),
u.route("", "^/oauth/geo/(auth|callback|logout)$", defaultUpstream),
// Admin Area > Geo routes
u.route("", "^/admin/geo/replication/projects", defaultUpstream),
u.route("", "^/admin/geo/replication/designs", defaultUpstream),
// Geo API routes
u.route("", "^/api/v4/geo_replication", defaultUpstream),
u.route("", "^/api/v4/geo/proxy_git_ssh", defaultUpstream),
u.route("", "^/api/v4/geo/graphql", defaultUpstream),
// Internal API routes
u.route("", "^/api/v4/internal", defaultUpstream),
u.route(
"", `^/assets/`,
static.ServeExisting(
u.URLPrefix,
staticpages.CacheExpireMax,
assetsNotFoundHandler,
),
withoutTracing(), // Tracing on assets is very noisy
),
// Don't define a catch-all route. If a route does not match, then we know
// the request should be proxied.
}
}
func denyWebsocket(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if websocket.IsWebSocketUpgrade(r) {
helper.HTTPError(w, r, "websocket upgrade not allowed", http.StatusBadRequest)
return
}
next.ServeHTTP(w, r)
})
}