WIP: feat: cron job to rm remote actors (persons) without following in local instance #8

Draft
realaravinth wants to merge 7 commits from task-602 into forgejo
11 changed files with 615 additions and 33 deletions

View file

@ -0,0 +1,187 @@
package federation
import (
"context"
"strings"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"xorm.io/builder"
)
// HookTask represents a hook task.
// exact copy of models/webhook/hooktask.go when this migration was created
// - xorm:"-" fields deleted
type FederatedHost struct {
ID int64 `xorm:"pk autoincr"`
isBlocked bool
HostFqdn string `xorm:"UNIQUE(s) INDEX"`
}
func GetFederatdHost(ctx context.Context, hostFqdn string) (*FederatedHost, error) {
rec := new(FederatedHost)
_, err := db.GetEngine(ctx).
Table("federated_host").Where("host_fqdn = ?", hostFqdn).Get(rec)
if err != nil {
return nil, err
}
return rec, nil
}
func FederatedHostExists(ctx context.Context, hostFqdn string) (bool, error) {
rec := new(FederatedHost)
exists, err := db.GetEngine(ctx).
Table("federated_host").Where("host_fqdn = ?", hostFqdn).Get(rec)
if err != nil {
return false, err
}
return exists, nil
}
func (host *FederatedHost) Save(ctx context.Context) error {
_, err := db.GetEngine(ctx).
Insert(host)
return err
}
type FederatedUser struct {
ID int64 `xorm:"pk autoincr"`
UserID int64 `xorm:"INDEX"`
ExternalID string `xorm:"UNIQUE(s) INDEX"`
FederationHostID int64 `xorm:"INDEX"`
}
func CreateFederatedUser(ctx context.Context, u *user.User, host *FederatedHost) error {
engine := db.GetEngine(ctx)
// _, err := engine.
// Insert(u)
// if err != nil {
// return err
// }
federatedUser := new(FederatedUser)
federatedUser.ExternalID = u.Name
federatedUser.UserID = u.ID
federatedUser.FederationHostID = host.ID
_, err := engine.Insert(federatedUser)
return err
}
func CreatUser(ctx context.Context, u *user.User) error {
// set system defaults
u.Visibility = setting.Service.DefaultUserVisibilityMode
u.AllowCreateOrganization = setting.Service.DefaultAllowCreateOrganization && !setting.Admin.DisableRegularOrgCreation
u.EmailNotificationsPreference = setting.Admin.DefaultEmailNotification
u.MaxRepoCreation = -1
u.Theme = setting.UI.DefaultTheme
u.IsRestricted = setting.Service.DefaultUserIsRestricted
u.IsActive = !(setting.Service.RegisterEmailConfirm || setting.Service.RegisterManualConfirm)
// Ensure consistency of the dates.
if u.UpdatedUnix < u.CreatedUnix {
u.UpdatedUnix = u.CreatedUnix
}
// validate data
if err := user.ValidateUser(u); err != nil {
return err
}
if err := user.ValidateEmail(u.Email); err != nil {
return err
}
ctx, committer, err := db.TxContext(ctx)
if err != nil {
return err
}
defer committer.Close()
isExist, err := user.IsUserExist(ctx, 0, u.Name)
if err != nil {
return err
} else if isExist {
return user.ErrUserAlreadyExist{u.Name}
}
isExist, err = user.IsEmailUsed(ctx, u.Email)
if err != nil {
return err
} else if isExist {
return user.ErrEmailAlreadyUsed{
Email: u.Email,
}
}
// prepare for database
u.LowerName = strings.ToLower(u.Name)
u.AvatarEmail = u.Email
if u.Rands, err = user.GetUserSalt(); err != nil {
return err
}
if u.Passwd != "" {
if err = u.SetPassword(u.Passwd); err != nil {
return err
}
} else {
u.Salt = ""
u.PasswdHashAlgo = ""
}
// save changes to database
if err = user.DeleteUserRedirect(ctx, u.Name); err != nil {
return err
}
if u.CreatedUnix == 0 {
// Caller expects auto-time for creation & update timestamps.
err = db.Insert(ctx, u)
} else {
// Caller sets the timestamps themselves. They are responsible for ensuring
// both `CreatedUnix` and `UpdatedUnix` are set appropriately.
_, err = db.GetEngine(ctx).NoAutoTime().Insert(u)
}
if err != nil {
return err
}
// insert email address
if err := db.Insert(ctx, &user.EmailAddress{
UID: u.ID,
Email: u.Email,
LowerEmail: strings.ToLower(u.Email),
IsActivated: u.IsActive,
IsPrimary: true,
}); err != nil {
return err
}
return committer.Commit()
}
func GetRemoteUsersWithNoLocalFollowers(ctx context.Context, olderThan time.Duration, page int) ([]user.User, error) {
limit := 40
offset := page * limit
var users []user.User
err := db.GetEngine(ctx).
Table("user").
Where("num_followers = 0").
And(builder.Lt{"user.created_unix": time.Now().Add(-olderThan).Unix()}).
Join("inner", "federated_user", "federated_user.user_id = user.id").
Limit(limit, offset).
Find(&users)
if err != nil {
log.Trace("Error: GetRemoteUserWithNoLocalFollowers: %w", err)
return nil, err
}
return users, nil
}

View file

@ -50,6 +50,10 @@ var migrations = []*Migration{
NewMigration("create the forgejo_repo_flag table", forgejo_v1_22.CreateRepoFlagTable),
// v5 -> v6
NewMigration("Add wiki_branch to repository", forgejo_v1_22.AddWikiBranchToRepository),
// v6 -> v7
NewMigration("create federated_host table", forgejo_v1_22.AddFederatedHost),
// v7 -> v8
NewMigration("create federated_user table", forgejo_v1_22.AddFederatedUser),
}
// GetCurrentDBVersion returns the current Forgejo database version.
@ -118,6 +122,7 @@ func Migrate(x *xorm.Engine) error {
}
v := currentVersion.Version
log.Info("Current version: %d", v)
// Downgrading Forgejo's database version not supported
if v > ExpectedVersion() {
@ -156,5 +161,6 @@ func Migrate(x *xorm.Engine) error {
return fmt.Errorf("sync: %w", err)
}
// panic("fn end")
return semver.SetVersionStringWithEngine(x, setting.ForgejoVersion)
}

View file

@ -0,0 +1,26 @@
// Copyright 2024 The Forgejo Authors
// SPDX-License-Identifier: AGPL-3.0-or-later
package v1_22 //nolint
import (
"code.gitea.io/gitea/models/federation"
"code.gitea.io/gitea/modules/log"
"xorm.io/xorm"
)
//// HookTask represents a hook task.
//// exact copy of models/webhook/hooktask.go when this migration was created
//// - xorm:"-" fields deleted
//type FederatedHost struct {
// ID int64 `xorm:"pk autoincr"`
// isBlocked bool
// HostFqdn string `xorm:"UNIQUE(s) INDEX"`
//}
func AddFederatedHost(x *xorm.Engine) error {
// panic("add host")
log.Info("Running Add host migration")
return x.Sync(new(federation.FederatedHost))
}

View file

@ -0,0 +1,25 @@
// Copyright 2024 The Forgejo Authors
// SPDX-License-Identifier: AGPL-3.0-or-later
package v1_22 //nolint
import (
"code.gitea.io/gitea/models/federation"
"code.gitea.io/gitea/modules/log"
"xorm.io/xorm"
)
// HookTask represents a hook task.
// exact copy of models/webhook/hooktask.go when this migration was created
// - xorm:"-" fields deleted
//type FederatedUser struct {
// ID int64 `xorm:"pk autoincr"`
// UserID int64 `xorm:"INDEX"`
// ExternalID string `xorm:"UNIQUE(s) INDEX"`
// FederationHostID int64 `xorm:"INDEX"`
//}
func AddFederatedUser(x *xorm.Engine) error {
log.Info("Running Add user migration")
return x.Sync(new(federation.FederatedUser))
}

View file

@ -2921,6 +2921,7 @@ dashboard.start_schedule_tasks = Start schedule tasks
dashboard.sync_branch.started = Branches Sync started
dashboard.sync_tag.started = Tags Sync started
dashboard.rebuild_issue_indexer = Rebuild issue indexer
dashboard.remote_actor_cleanup = Clean remote actors with no local followers
users.user_manage_panel = Manage user accounts
users.new_account = Create User Account

View file

@ -17,6 +17,7 @@ import (
"code.gitea.io/gitea/modules/sitemap"
"code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/forgefed"
)
const (
@ -99,6 +100,31 @@ func RenderUserSearch(ctx *context.Context, opts *user_model.SearchUserOptions,
return
}
}
if len(opts.Keyword) > 0 && forgefed.IsFingerable(opts.Keyword) {
webfingerRes, err := forgefed.WebFingerLookup(opts.Keyword)
if err != nil {
ctx.ServerError("SearchUsers", err)
return
}
person, err := forgefed.GetActor(webfingerRes.GetActorLink().Href)
if err != nil {
ctx.ServerError("SearchUsers", err)
return
}
_, err = forgefed.SavePerson(ctx, person)
if err != nil {
ctx.ServerError("SearchUsers", err)
return
}
// users, count, err = user_model.SearchUsers(ctx, opts)
// if err != nil {
// ctx.ServerError("SearchUsers", err)
// return
// }
}
if isSitemap {
m := sitemap.NewSitemap()
for _, item := range users {

View file

@ -13,25 +13,11 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/forgefed"
)
// https://datatracker.ietf.org/doc/html/draft-ietf-appsawg-webfinger-14#section-4.4
type webfingerJRD struct {
Subject string `json:"subject,omitempty"`
Aliases []string `json:"aliases,omitempty"`
Properties map[string]any `json:"properties,omitempty"`
Links []*webfingerLink `json:"links,omitempty"`
}
type webfingerLink struct {
Rel string `json:"rel,omitempty"`
Type string `json:"type,omitempty"`
Href string `json:"href,omitempty"`
Titles map[string]string `json:"titles,omitempty"`
Properties map[string]any `json:"properties,omitempty"`
}
// WebfingerQuery returns information about a resource
// https://datatracker.ietf.org/doc/html/rfc7565
func WebfingerQuery(ctx *context.Context) {
@ -64,6 +50,19 @@ func WebfingerQuery(ctx *context.Context) {
if u != nil && u.KeepEmailPrivate {
err = user_model.ErrUserNotExist{}
}
case "https", "http":
if resource.Host != appURL.Host {
ctx.Error(http.StatusBadRequest)
return
}
parts := strings.Split(resource.Path, "/")
if len(parts) < 2 { // fragment[0] is empty space, fragment[1] may be username
ctx.Error(http.StatusBadRequest)
return
}
u, err = user_model.GetUserByName(ctx, parts[1])
default:
ctx.Error(http.StatusBadRequest)
return
@ -91,7 +90,7 @@ func WebfingerQuery(ctx *context.Context) {
aliases = append(aliases, fmt.Sprintf("mailto:%s", u.Email))
}
links := []*webfingerLink{
links := []*forgefed.WebfingerLink{
{
Rel: "http://webfinger.net/rel/profile-page",
Type: "text/html",
@ -112,8 +111,9 @@ func WebfingerQuery(ctx *context.Context) {
},
}
ctx.Resp.Header().Add("Content-Type", "application/jrd+json")
ctx.Resp.Header().Add("Access-Control-Allow-Origin", "*")
ctx.JSON(http.StatusOK, &webfingerJRD{
ctx.JSON(http.StatusOK, &forgefed.WebfingerJRD{
Subject: fmt.Sprintf("acct:%s@%s", url.QueryEscape(u.Name), appURL.Host),
Aliases: aliases,
Links: links,

View file

@ -15,6 +15,7 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/actions"
"code.gitea.io/gitea/services/auth"
forgefed_service "code.gitea.io/gitea/services/forgefed"
"code.gitea.io/gitea/services/migrations"
mirror_service "code.gitea.io/gitea/services/mirror"
packages_cleanup_service "code.gitea.io/gitea/services/packages/cleanup"
@ -187,7 +188,27 @@ func initBasicTasks() {
if setting.Packages.Enabled {
registerCleanupPackages()
}
if setting.Actions.Enabled {
registerActionsCleanup()
}
if setting.Federation.Enabled {
registerCleanupRemotePersonsWithNoFollowers()
}
}
func registerCleanupRemotePersonsWithNoFollowers() {
RegisterTaskFatal("remote_actor_cleanup", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@midnight",
},
OlderThan: 24 * time.Hour,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
acConfig := config.(*OlderThanConfig)
return forgefed_service.CleanUpRemotePersons(ctx, acConfig.OlderThan)
})
}

124
services/forgefed/actor.go Normal file
View file

@ -0,0 +1,124 @@
package forgefed
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"code.gitea.io/gitea/models/federation"
"code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/log"
user_service "code.gitea.io/gitea/services/user"
ap "github.com/go-ap/activitypub"
)
func GetActor(id string) (*ap.Actor, error) {
client := http.Client{}
req, err := http.NewRequest("GET", id, nil)
if err != nil {
//Handle Error
}
req.Header = http.Header{
"Content-Type": {"application/activity+json"},
}
r, err := client.Do(req)
if err != nil {
return nil, err
}
defer r.Body.Close()
body, err := io.ReadAll(r.Body)
if err != nil {
return nil, err
}
actorObj := new(ap.Actor)
err = json.Unmarshal(body, &actorObj)
if err != nil {
return nil, err
}
return actorObj, nil
}
func SavePerson(ctx context.Context, person *ap.Person) (*user.User, error) {
fmt.Println(person.ID.String())
hostname, err := GetHostnameFromResource(person.ID.String())
exists, err := federation.FederatedHostExists(ctx, hostname)
if err != nil {
return nil, err
}
var federatedHost federation.FederatedHost
if exists {
x, err := federation.GetFederatdHost(ctx, hostname)
federatedHost = *x
if err != nil {
return nil, err
}
} else {
federatedHost := new(federation.FederatedHost)
federatedHost.HostFqdn = hostname
if err = federatedHost.Save(ctx); err != nil {
return nil, err
}
}
if err != nil {
return nil, err
}
u := new(user.User)
u.Name = "@" + person.PreferredUsername.String() + "@" + hostname
//panic(u.Name)
u.Email = person.PreferredUsername.String() + "@" + hostname
u.Website = person.URL.GetID().String()
u.KeepEmailPrivate = true
exist, err := user.GetUser(ctx, u)
if err != nil {
return nil, err
}
if exist {
return u, nil // TODO: must also check for federatedUser existence
}
if err = federation.CreatUser(ctx, u); err != nil {
return nil, err
}
if err = federation.CreateFederatedUser(ctx, u, &federatedHost); err != nil {
return nil, err
}
return u, nil
}
// Clean up remote actors (persons) without any followers in local instance
func CleanUpRemotePersons(ctx context.Context, olderThan time.Duration) error {
page := 0
for {
users, err := federation.GetRemoteUsersWithNoLocalFollowers(ctx, olderThan, page)
if len(users) == 0 {
break
}
if err != nil {
log.Trace("Error: CleanUpRemotePersons: %v", err)
return err
}
for _, u := range users {
err = user_service.DeleteUser(ctx, &u, false)
if err != nil {
log.Trace("Error: CleanUpRemotePersons: %v", err)
return err
}
}
page += 1
}
return nil
}

View file

@ -0,0 +1,167 @@
package forgefed
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"code.gitea.io/gitea/modules/log"
)
// https://datatracker.ietf.org/doc/html/draft-ietf-appsawg-webfinger-14#section-4.4
type WebfingerJRD struct {
Subject string `json:"subject,omitempty"`
Aliases []string `json:"aliases,omitempty"`
Properties map[string]any `json:"properties,omitempty"`
Links []*WebfingerLink `json:"links,omitempty"`
}
func (w WebfingerJRD) GetAvatar() *WebfingerLink {
for _, link := range w.Links {
if link.Rel == "http://webfinger.net/rel/avatar" {
return link
}
}
return nil
}
func (w WebfingerJRD) GetProfilePage() *WebfingerLink {
for _, link := range w.Links {
if link.Rel == "http://webfinger.net/rel/profile-page" && link.Type == "text/html" {
return link
}
}
return nil
}
func (w WebfingerJRD) GetActorLink() *WebfingerLink {
for _, link := range w.Links {
if link.Rel == "self" && link.Type == "application/activity+json" {
return link
}
}
return nil
}
type WebfingerLink struct {
Rel string `json:"rel,omitempty"`
Type string `json:"type,omitempty"`
Href string `json:"href,omitempty"`
Titles map[string]string `json:"titles,omitempty"`
Properties map[string]any `json:"properties,omitempty"`
}
func GetHostnameFromResource(resource string) (string, error) {
r := resource
if strings.HasPrefix(resource, "@") {
resource, _ = strings.CutPrefix(resource, "@")
}
actor, err := url.Parse(resource)
if err != nil {
return "", err
}
var hostname string
switch actor.Scheme {
case "":
i := strings.Split(resource, "@")
if len(i) != 2 {
log.Error("Invalid webfinger query " + r)
return "", errors.New("Invalid webfinger query " + r)
}
hostname = i[1]
case "mailto":
i := strings.Split(resource, "@")
if len(i) != 2 {
log.Error("Invalid webfinger query " + r)
return "", errors.New("Invalid webfinger query " + r)
}
hostname = i[1]
case "https":
hostname = actor.Host
default:
log.Error("Invalid webfinger query " + r)
return "", errors.New("Invalid webfinger query" + r)
}
return hostname, nil
}
// Get Actor object by performing webfinger lookup
func WebFingerLookup(q string) (*WebfingerJRD, error) {
if strings.HasPrefix(q, "@") {
q, _ = strings.CutPrefix(q, "@")
}
actor, err := url.Parse(q)
if err != nil {
return nil, err
}
var res string
switch actor.Scheme {
case "":
res = fmt.Sprintf("acct:%s", q)
case "mailto":
res = q
case "https":
res = q
default:
return nil, errors.New("Invalid webfinger query")
}
hostname, err := GetHostnameFromResource(q)
if err != nil {
return nil, err
}
link := fmt.Sprintf("https://%s/.well-known/webfinger?resource=%s", hostname, res)
r, err := http.Get(link)
if err != nil {
return nil, err
}
defer r.Body.Close()
webfingerResponse := new(WebfingerJRD)
err = json.NewDecoder(r.Body).Decode(webfingerResponse)
if err != nil {
return nil, err
}
return webfingerResponse, nil
}
func IsFingerable(resource string) bool {
if strings.HasPrefix(resource, "@") {
resource, _ = strings.CutPrefix(resource, "@")
}
actor, err := url.Parse(resource)
if err != nil {
return false
}
switch actor.Scheme {
case "":
i := strings.Split(resource, "@")
if len(i) == 2 {
_ = i[1] // TODO: do len check before referencing element #2
return true
}
return false
case "mailto":
i := strings.Split(resource, "@")
if len(i) == 2 {
_ = i[1]
return true
}
return false
case "https":
return true
default:
return false
}
}

View file

@ -12,6 +12,7 @@ import (
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/forgefed"
"code.gitea.io/gitea/tests"
"github.com/stretchr/testify/assert"
@ -29,27 +30,12 @@ func TestWebfinger(t *testing.T) {
appURL, _ := url.Parse(setting.AppURL)
type webfingerLink struct {
Rel string `json:"rel,omitempty"`
Type string `json:"type,omitempty"`
Href string `json:"href,omitempty"`
Titles map[string]string `json:"titles,omitempty"`
Properties map[string]any `json:"properties,omitempty"`
}
type webfingerJRD struct {
Subject string `json:"subject,omitempty"`
Aliases []string `json:"aliases,omitempty"`
Properties map[string]any `json:"properties,omitempty"`
Links []*webfingerLink `json:"links,omitempty"`
}
session := loginUser(t, "user1")
req := NewRequest(t, "GET", fmt.Sprintf("/.well-known/webfinger?resource=acct:%s@%s", user.LowerName, appURL.Host))
resp := MakeRequest(t, req, http.StatusOK)
var jrd webfingerJRD
var jrd forgefed.WebfingerJRD
DecodeJSON(t, resp, &jrd)
assert.Equal(t, "acct:user2@"+appURL.Host, jrd.Subject)
assert.ElementsMatch(t, []string{user.HTMLURL(), appURL.String() + "api/v1/activitypub/user-id/" + fmt.Sprint(user.ID)}, jrd.Aliases)
@ -65,4 +51,17 @@ func TestWebfinger(t *testing.T) {
req = NewRequest(t, "GET", fmt.Sprintf("/.well-known/webfinger?resource=mailto:%s", user.Email))
MakeRequest(t, req, http.StatusNotFound)
req = NewRequest(t, "GET", fmt.Sprintf("/.well-known/webfinger?resource=http://%s/%s/foo", appURL.Host, user.Name))
session.MakeRequest(t, req, http.StatusOK)
req = NewRequest(t, "GET", fmt.Sprintf("/.well-known/webfinger?resource=https://%s/%s", appURL.Host, user.Name))
session.MakeRequest(t, req, http.StatusOK)
req = NewRequest(t, "GET", fmt.Sprintf("/.well-known/webfinger?resource=http://%s", appURL.Host))
MakeRequest(t, req, http.StatusBadRequest)
req = NewRequest(t, "GET", fmt.Sprintf("/.well-known/webfinger?resource=http://%s/%s/foo", "example.com", user.Name))
MakeRequest(t, req, http.StatusBadRequest)
}