diff --git a/models/git/lfs.go b/models/git/lfs.go index a86e84c05..8d418b928 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -6,6 +6,7 @@ package git import ( "context" "fmt" + "time" "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/perm" @@ -14,6 +15,7 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" @@ -180,6 +182,12 @@ func GetLFSMetaObjectByOid(repoID int64, oid string) (*LFSMetaObject, error) { // RemoveLFSMetaObjectByOid removes a LFSMetaObject entry from database by its OID. // It may return ErrLFSObjectNotExist or a database error. func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) { + return RemoveLFSMetaObjectByOidFn(repoID, oid, nil) +} + +// RemoveLFSMetaObjectByOidFn removes a LFSMetaObject entry from database by its OID. +// It may return ErrLFSObjectNotExist or a database error. It will run Fn with the current count within the transaction +func RemoveLFSMetaObjectByOidFn(repoID int64, oid string, fn func(count int64) error) (int64, error) { if len(oid) == 0 { return 0, ErrLFSObjectNotExist } @@ -200,6 +208,12 @@ func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) { return count, err } + if fn != nil { + if err := fn(count); err != nil { + return count, err + } + } + return count, committer.Commit() } @@ -319,3 +333,43 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) { } return lfsSize, nil } + +type IterateLFSMetaObjectsForRepoOptions struct { + OlderThan time.Time +} + +// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo +func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error { + var start int + batchSize := setting.Database.IterateBufferSize + engine := db.GetEngine(ctx) + type CountLFSMetaObject struct { + Count int64 + LFSMetaObject + } + + for { + beans := make([]*CountLFSMetaObject, 0, batchSize) + // SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id + sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`"). + Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid"). + Where("`lfs_meta_object`.repository_id = ?", repoID) + if !opts.OlderThan.IsZero() { + sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan) + } + sess.GroupBy("`lfs_meta_object`.id") + if err := sess.Limit(batchSize, start).Find(&beans); err != nil { + return err + } + if len(beans) == 0 { + return nil + } + start += len(beans) + + for _, bean := range beans { + if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil { + return err + } + } + } +} diff --git a/modules/doctor/lfs.go b/modules/doctor/lfs.go new file mode 100644 index 000000000..410ed5a9a --- /dev/null +++ b/modules/doctor/lfs.go @@ -0,0 +1,37 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package doctor + +import ( + "context" + "fmt" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/services/repository" +) + +func init() { + Register(&Check{ + Title: "Garbage collect LFS", + Name: "gc-lfs", + IsDefault: false, + Run: garbageCollectLFSCheck, + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) +} + +func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool) error { + if !setting.LFS.StartServer { + return fmt.Errorf("LFS support is disabled") + } + + if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil { + return err + } + + return checkStorage(&checkStorageOptions{LFS: true})(ctx, logger, autofix) +} diff --git a/services/cron/tasks_basic.go b/services/cron/tasks_basic.go index acf3896b7..05aef6623 100644 --- a/services/cron/tasks_basic.go +++ b/services/cron/tasks_basic.go @@ -63,7 +63,7 @@ func registerRepoHealthCheck() { for _, arg := range rhcConfig.Args { args = append(args, git.CmdArg(arg)) } - return repo_service.GitFsck(ctx, rhcConfig.Timeout, args) + return repo_service.GitFsckRepos(ctx, rhcConfig.Timeout, args) }) } diff --git a/services/repository/check.go b/services/repository/check.go index 6e29dc93d..293cb04d3 100644 --- a/services/repository/check.go +++ b/services/repository/check.go @@ -22,8 +22,8 @@ import ( "xorm.io/builder" ) -// GitFsck calls 'git fsck' to check repository health. -func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) error { +// GitFsckRepos calls 'git fsck' to check repository health. +func GitFsckRepos(ctx context.Context, timeout time.Duration, args []git.CmdArg) error { log.Trace("Doing: GitFsck") if err := db.Iterate( @@ -35,15 +35,7 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro return db.ErrCancelledf("before fsck of %s", repo.FullName()) default: } - log.Trace("Running health check on repository %v", repo) - repoPath := repo.RepoPath() - if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil { - log.Warn("Failed to health check repository (%v): %v", repo, err) - if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil { - log.Error("CreateRepositoryNotice: %v", err) - } - } - return nil + return GitFsckRepo(ctx, repo, timeout, args) }, ); err != nil { log.Trace("Error: GitFsck: %v", err) @@ -54,6 +46,19 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro return nil } +// GitFsckRepo calls 'git fsck' to check an individual repository's health. +func GitFsckRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error { + log.Trace("Running health check on repository %-v", repo) + repoPath := repo.RepoPath() + if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil { + log.Warn("Failed to health check repository (%-v): %v", repo, err) + if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil { + log.Error("CreateRepositoryNotice: %v", err) + } + } + return nil +} + // GitGcRepos calls 'git gc' to remove unnecessary files and optimize the local repository func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) error { log.Trace("Doing: GitGcRepos") @@ -68,33 +73,7 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) return db.ErrCancelledf("before GC of %s", repo.FullName()) default: } - log.Trace("Running git gc on %v", repo) - command := git.NewCommand(ctx, args...). - SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName())) - var stdout string - var err error - stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()}) - - if err != nil { - log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err) - desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err) - if err = system_model.CreateRepositoryNotice(desc); err != nil { - log.Error("CreateRepositoryNotice: %v", err) - } - return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err) - } - - // Now update the size of the repository - if err := repo_module.UpdateRepoSize(ctx, repo); err != nil { - log.Error("Updating size as part of garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err) - desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err) - if err = system_model.CreateRepositoryNotice(desc); err != nil { - log.Error("CreateRepositoryNotice: %v", err) - } - return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err) - } - - return nil + return GitGcRepo(ctx, repo, timeout, args) }, ); err != nil { return err @@ -104,6 +83,37 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) return nil } +// GitGcRepo calls 'git gc' to remove unnecessary files and optimize the local repository +func GitGcRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error { + log.Trace("Running git gc on %-v", repo) + command := git.NewCommand(ctx, args...). + SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName())) + var stdout string + var err error + stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()}) + + if err != nil { + log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err) + desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err) + if err = system_model.CreateRepositoryNotice(desc); err != nil { + log.Error("CreateRepositoryNotice: %v", err) + } + return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err) + } + + // Now update the size of the repository + if err := repo_module.UpdateRepoSize(ctx, repo); err != nil { + log.Error("Updating size as part of garbage collection failed for %-v. Stdout: %s\nError: %v", repo, stdout, err) + desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err) + if err = system_model.CreateRepositoryNotice(desc); err != nil { + log.Error("CreateRepositoryNotice: %v", err) + } + return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err) + } + + return nil +} + func gatherMissingRepoRecords(ctx context.Context) ([]*repo_model.Repository, error) { repos := make([]*repo_model.Repository, 0, 10) if err := db.Iterate( diff --git a/services/repository/lfs.go b/services/repository/lfs.go new file mode 100644 index 000000000..0e88d359a --- /dev/null +++ b/services/repository/lfs.go @@ -0,0 +1,105 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package repository + +import ( + "context" + "fmt" + "time" + + "code.gitea.io/gitea/models/db" + git_model "code.gitea.io/gitea/models/git" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/log" + + "xorm.io/builder" +) + +func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error { + log.Trace("Doing: GarbageCollectLFSMetaObjects") + + if err := db.Iterate( + ctx, + builder.And(builder.Gt{"id": 0}), + func(ctx context.Context, repo *repo_model.Repository) error { + return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix) + }, + ); err != nil { + return err + } + + log.Trace("Finished: GarbageCollectLFSMetaObjects") + return nil +} + +func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error { + if logger != nil { + logger.Info("Checking %-v", repo) + } + total, orphaned, collected, deleted := 0, 0, 0, 0 + if logger != nil { + defer func() { + if orphaned == 0 { + logger.Info("Found %d total LFSMetaObjects in %-v", total, repo) + } else if !autofix { + logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo) + } else { + logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted) + } + }() + } + + gitRepo, err := git.OpenRepository(ctx, repo.RepoPath()) + if err != nil { + log.Error("Unable to open git repository %-v: %v", repo, err) + return err + } + defer gitRepo.Close() + + store := lfs.NewContentStore() + + return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error { + total++ + pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent())) + + if gitRepo.IsObjectExist(pointerSha.String()) { + return nil + } + orphaned++ + + if !autofix { + return nil + } + // Non-existent pointer file + _, err = git_model.RemoveLFSMetaObjectByOidFn(repo.ID, metaObject.Oid, func(count int64) error { + if count > 0 { + return nil + } + + if err := store.Delete(metaObject.RelativePath()); err != nil { + log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err) + } + deleted++ + return nil + }) + if err != nil { + return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err) + } + collected++ + + return nil + }, &git_model.IterateLFSMetaObjectsForRepoOptions{ + // Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload + // and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby + // an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid + // changes in new branches that might lead to lfs objects becoming temporarily unassociated with git + // objects. + // + // It is likely that a week is potentially excessive but it should definitely be enough that any + // unassociated LFS object is genuinely unassociated. + OlderThan: time.Now().Add(-24 * 7 * time.Hour), + }) +}