Don't remove all mirror repository's releases when mirroring (#28817)

Fix #22066

# Purpose

This PR fix the releases will be deleted when mirror repository sync the
tags.

# The problem

In the previous implementation of #19125. All releases record in
databases of one mirror repository will be deleted before sync.
Ref:
https://github.com/go-gitea/gitea/pull/19125/files#diff-2aa04998a791c30e5a02b49a97c07fcd93d50e8b31640ce2ddb1afeebf605d02R481

# The Pros

This PR introduced a new method which will load all releases from
databases and all tags on git data into memory. And detect which tags
needs to be inserted, which tags need to be updated or deleted. Only
tags releases(IsTag=true) which are not included in git data will be
deleted, only tags which sha1 changed will be updated. So it will not
delete any real releases include drafts.

# The Cons

The drawback is the memory usage will be higher than before if there are
many tags on this repository. This PR defined a special release struct
to reduce columns loaded from database to memory.
This commit is contained in:
Lunny Xiao 2024-01-26 14:18:19 +08:00 committed by GitHub
parent ba24e0ba61
commit 534917d576
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 146 additions and 6 deletions

View File

@ -508,6 +508,18 @@ func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Re
return nil return nil
} }
// shortRelease to reduce load memory, this struct can replace repo_model.Release
type shortRelease struct {
ID int64
TagName string
Sha1 string
IsTag bool
}
func (shortRelease) TableName() string {
return "release"
}
// pullMirrorReleaseSync is a pull-mirror specific tag<->release table // pullMirrorReleaseSync is a pull-mirror specific tag<->release table
// synchronization which overwrites all Releases from the repository tags. This // synchronization which overwrites all Releases from the repository tags. This
// can be relied on since a pull-mirror is always identical to its // can be relied on since a pull-mirror is always identical to its
@ -521,16 +533,20 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git
return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err)
} }
err = db.WithTx(ctx, func(ctx context.Context) error { err = db.WithTx(ctx, func(ctx context.Context) error {
// dbReleases, err := db.Find[shortRelease](ctx, repo_model.FindReleasesOptions{
// clear out existing releases RepoID: repo.ID,
// IncludeDrafts: true,
if _, err := db.DeleteByBean(ctx, &repo_model.Release{RepoID: repo.ID}); err != nil { IncludeTags: true,
return fmt.Errorf("unable to clear releases for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) })
if err != nil {
return fmt.Errorf("unable to FindReleases in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err)
} }
inserts, deletes, updates := calcSync(tags, dbReleases)
// //
// make release set identical to upstream tags // make release set identical to upstream tags
// //
for _, tag := range tags { for _, tag := range inserts {
release := repo_model.Release{ release := repo_model.Release{
RepoID: repo.ID, RepoID: repo.ID,
TagName: tag.Name, TagName: tag.Name,
@ -547,6 +563,25 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git
return fmt.Errorf("unable insert tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) return fmt.Errorf("unable insert tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err)
} }
} }
// only delete tags releases
if len(deletes) > 0 {
if _, err := db.GetEngine(ctx).Where("repo_id=?", repo.ID).
In("id", deletes).
Delete(&repo_model.Release{}); err != nil {
return fmt.Errorf("unable to delete tags for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err)
}
}
for _, tag := range updates {
if _, err := db.GetEngine(ctx).Where("repo_id = ? AND lower_tag_name = ?", repo.ID, strings.ToLower(tag.Name)).
Cols("sha1").
Update(&repo_model.Release{
Sha1: tag.Object.String(),
}); err != nil {
return fmt.Errorf("unable to update tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err)
}
}
return nil return nil
}) })
if err != nil { if err != nil {
@ -556,3 +591,32 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git
log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags) log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags)
return nil return nil
} }
func calcSync(destTags []*git.Tag, dbTags []*shortRelease) ([]*git.Tag, []int64, []*git.Tag) {
destTagMap := make(map[string]*git.Tag)
for _, tag := range destTags {
destTagMap[tag.Name] = tag
}
dbTagMap := make(map[string]*shortRelease)
for _, rel := range dbTags {
dbTagMap[rel.TagName] = rel
}
inserted := make([]*git.Tag, 0, 10)
updated := make([]*git.Tag, 0, 10)
for _, tag := range destTags {
rel := dbTagMap[tag.Name]
if rel == nil {
inserted = append(inserted, tag)
} else if rel.Sha1 != tag.Object.String() {
updated = append(updated, tag)
}
}
deleted := make([]int64, 0, 10)
for _, tag := range dbTags {
if destTagMap[tag.TagName] == nil && tag.IsTag {
deleted = append(deleted, tag.ID)
}
}
return inserted, deleted, updated
}

View File

@ -0,0 +1,76 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package repository
import (
"testing"
"code.gitea.io/gitea/modules/git"
"github.com/stretchr/testify/assert"
)
func Test_calcSync(t *testing.T) {
gitTags := []*git.Tag{
/*{
Name: "v0.1.0-beta", //deleted tag
Object: git.MustIDFromString(""),
},
{
Name: "v0.1.1-beta", //deleted tag but release should not be deleted because it's a release
Object: git.MustIDFromString(""),
},
*/
{
Name: "v1.0.0", // keep as before
Object: git.MustIDFromString("1006e6e13c73ad3d9e2d5682ad266b5016523485"),
},
{
Name: "v1.1.0", // retagged with new commit id
Object: git.MustIDFromString("bbdb7df30248e7d4a26a909c8d2598a152e13868"),
},
{
Name: "v1.2.0", // new tag
Object: git.MustIDFromString("a5147145e2f24d89fd6d2a87826384cc1d253267"),
},
}
dbReleases := []*shortRelease{
{
ID: 1,
TagName: "v0.1.0-beta",
Sha1: "244758d7da8dd1d9e0727e8cb7704ed4ba9a17c3",
IsTag: true,
},
{
ID: 2,
TagName: "v0.1.1-beta",
Sha1: "244758d7da8dd1d9e0727e8cb7704ed4ba9a17c3",
IsTag: false,
},
{
ID: 3,
TagName: "v1.0.0",
Sha1: "1006e6e13c73ad3d9e2d5682ad266b5016523485",
},
{
ID: 4,
TagName: "v1.1.0",
Sha1: "53ab18dcecf4152b58328d1f47429510eb414d50",
},
}
inserts, deletes, updates := calcSync(gitTags, dbReleases)
if assert.EqualValues(t, 1, len(inserts), "inserts") {
assert.EqualValues(t, *gitTags[2], *inserts[0], "inserts equal")
}
if assert.EqualValues(t, 1, len(deletes), "deletes") {
assert.EqualValues(t, 1, deletes[0], "deletes equal")
}
if assert.EqualValues(t, 1, len(updates), "updates") {
assert.EqualValues(t, *gitTags[1], *updates[0], "updates equal")
}
}