From 286d09203f3b77a2dc26c28b278a16953e5ee523 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 6 Mar 2024 16:47:52 +0800 Subject: [PATCH 1/4] Sync branches to DB immediately when handle git hook calling (gitea#29493) Unlike other async processing in the queue, we should sync branches to the DB immediately when handling git hook calling. If it fails, users can see the error message in the output of the git command. It can avoid potential inconsistency issues, and help #29494. --------- Co-authored-by: Lunny Xiao --- models/git/branch.go | 5 + routers/private/hook_post_receive.go | 66 +++++++++++++- services/repository/branch.go | 115 ++++++++++++++++------- services/repository/push.go | 9 -- tests/integration/git_push_test.go | 131 +++++++++++++++++++++++++++ 5 files changed, 282 insertions(+), 44 deletions(-) create mode 100644 tests/integration/git_push_test.go diff --git a/models/git/branch.go b/models/git/branch.go index 6baad65ab4..a5ee2bde66 100644 --- a/models/git/branch.go +++ b/models/git/branch.go @@ -162,6 +162,11 @@ func GetBranch(ctx context.Context, repoID int64, branchName string) (*Branch, e return &branch, nil } +func GetBranches(ctx context.Context, repoID int64, branchNames []string) ([]*Branch, error) { + branches := make([]*Branch, 0, len(branchNames)) + return branches, db.GetEngine(ctx).Where("repo_id=?", repoID).In("name", branchNames).Find(&branches) +} + func AddBranches(ctx context.Context, branches []*Branch) error { for _, branch := range branches { if _, err := db.GetEngine(ctx).Insert(branch); err != nil { diff --git a/routers/private/hook_post_receive.go b/routers/private/hook_post_receive.go index f4c698d3ea..c34c0013d5 100644 --- a/routers/private/hook_post_receive.go +++ b/routers/private/hook_post_receive.go @@ -8,9 +8,11 @@ import ( "net/http" "strconv" + git_model "code.gitea.io/gitea/models/git" issues_model "code.gitea.io/gitea/models/issues" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/private" repo_module "code.gitea.io/gitea/modules/repository" @@ -27,6 +29,7 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { // We don't rely on RepoAssignment here because: // a) we don't need the git repo in this function + // OUT OF DATE: we do need the git repo to sync the branch to the db now. // b) our update function will likely change the repository in the db so we will need to refresh it // c) we don't always need the repo @@ -34,7 +37,11 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { repoName := ctx.Params(":repo") // defer getting the repository at this point - as we should only retrieve it if we're going to call update - var repo *repo_model.Repository + var ( + repo *repo_model.Repository + gitRepo *git.Repository + ) + defer gitRepo.Close() // it's safe to call Close on a nil pointer updates := make([]*repo_module.PushUpdateOptions, 0, len(opts.OldCommitIDs)) wasEmpty := false @@ -87,6 +94,63 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { }) return } + + branchesToSync := make([]*repo_module.PushUpdateOptions, 0, len(updates)) + for _, update := range updates { + if !update.RefFullName.IsBranch() { + continue + } + if repo == nil { + repo = loadRepository(ctx, ownerName, repoName) + if ctx.Written() { + return + } + wasEmpty = repo.IsEmpty + } + + if update.IsDelRef() { + if err := git_model.AddDeletedBranch(ctx, repo.ID, update.RefFullName.BranchName(), update.PusherID); err != nil { + log.Error("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err) + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ + Err: fmt.Sprintf("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err), + }) + return + } + } else { + branchesToSync = append(branchesToSync, update) + } + } + if len(branchesToSync) > 0 { + if gitRepo == nil { + var err error + gitRepo, err = gitrepo.OpenRepository(ctx, repo) + if err != nil { + log.Error("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err) + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ + Err: fmt.Sprintf("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err), + }) + return + } + } + + var ( + branchNames = make([]string, 0, len(branchesToSync)) + commitIDs = make([]string, 0, len(branchesToSync)) + ) + for _, update := range branchesToSync { + branchNames = append(branchNames, update.RefFullName.BranchName()) + commitIDs = append(commitIDs, update.NewCommitID) + } + + if err := repo_service.SyncBranchesToDB(ctx, repo.ID, opts.UserID, branchNames, commitIDs, func(commitID string) (*git.Commit, error) { + return gitRepo.GetCommit(commitID) + }); err != nil { + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ + Err: fmt.Sprintf("Failed to sync branch to DB in repository: %s/%s Error: %v", ownerName, repoName, err), + }) + return + } + } } // Handle Push Options diff --git a/services/repository/branch.go b/services/repository/branch.go index d349db7f74..9a0ff443af 100644 --- a/services/repository/branch.go +++ b/services/repository/branch.go @@ -225,44 +225,91 @@ func checkBranchName(ctx context.Context, repo *repo_model.Repository, name stri return err } -// syncBranchToDB sync the branch information in the database. It will try to update the branch first, -// if updated success with affect records > 0, then all are done. Because that means the branch has been in the database. -// If no record is affected, that means the branch does not exist in database. So there are two possibilities. -// One is this is a new branch, then we just need to insert the record. Another is the branches haven't been synced, -// then we need to sync all the branches into database. -func syncBranchToDB(ctx context.Context, repoID, pusherID int64, branchName string, commit *git.Commit) error { - cnt, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit) - if err != nil { - return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err) - } - if cnt > 0 { // This means branch does exist, so it's a normal update. It also means the branch has been synced. - return nil +// SyncBranchesToDB sync the branch information in the database. +// It will check whether the branches of the repository have never been synced before. +// If so, it will sync all branches of the repository. +// Otherwise, it will sync the branches that need to be updated. +func SyncBranchesToDB(ctx context.Context, repoID, pusherID int64, branchNames, commitIDs []string, getCommit func(commitID string) (*git.Commit, error)) error { + // Some designs that make the code look strange but are made for performance optimization purposes: + // 1. Sync branches in a batch to reduce the number of DB queries. + // 2. Lazy load commit information since it may be not necessary. + // 3. Exit early if synced all branches of git repo when there's no branch in DB. + // 4. Check the branches in DB if they are already synced. + // + // If the user pushes many branches at once, the Git hook will call the internal API in batches, rather than all at once. + // See https://github.com/go-gitea/gitea/blob/cb52b17f92e2d2293f7c003649743464492bca48/cmd/hook.go#L27 + // For the first batch, it will hit optimization 3. + // For other batches, it will hit optimization 4. + + if len(branchNames) != len(commitIDs) { + return fmt.Errorf("branchNames and commitIDs length not match") } - // if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21, - // we cannot simply insert the branch but need to check we have branches or not - hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{ - RepoID: repoID, - IsDeletedBranch: optional.Some(false), - }.ToConds()) - if err != nil { - return err - } - if !hasBranch { - if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil { - return fmt.Errorf("repo_module.SyncRepoBranches %d:%s failed: %v", repoID, branchName, err) + return db.WithTx(ctx, func(ctx context.Context) error { + branches, err := git_model.GetBranches(ctx, repoID, branchNames) + if err != nil { + return fmt.Errorf("git_model.GetBranches: %v", err) + } + + if len(branches) == 0 { + // if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21, + // we cannot simply insert the branch but need to check we have branches or not + hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{ + RepoID: repoID, + IsDeletedBranch: optional.Some(false), + }.ToConds()) + if err != nil { + return err + } + if !hasBranch { + if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil { + return fmt.Errorf("repo_module.SyncRepoBranches %d failed: %v", repoID, err) + } + return nil + } + } + + branchMap := make(map[string]*git_model.Branch, len(branches)) + for _, branch := range branches { + branchMap[branch.Name] = branch + } + + newBranches := make([]*git_model.Branch, 0, len(branchNames)) + + for i, branchName := range branchNames { + commitID := commitIDs[i] + branch, exist := branchMap[branchName] + if exist && branch.CommitID == commitID { + continue + } + + commit, err := getCommit(branchName) + if err != nil { + return fmt.Errorf("get commit of %s failed: %v", branchName, err) + } + + if exist { + if _, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit); err != nil { + return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err) + } + return nil + } + + // if database have branches but not this branch, it means this is a new branch + newBranches = append(newBranches, &git_model.Branch{ + RepoID: repoID, + Name: branchName, + CommitID: commit.ID.String(), + CommitMessage: commit.Summary(), + PusherID: pusherID, + CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()), + }) + } + + if len(newBranches) > 0 { + return db.Insert(ctx, newBranches) } return nil - } - - // if database have branches but not this branch, it means this is a new branch - return db.Insert(ctx, &git_model.Branch{ - RepoID: repoID, - Name: branchName, - CommitID: commit.ID.String(), - CommitMessage: commit.Summary(), - PusherID: pusherID, - CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()), }) } diff --git a/services/repository/push.go b/services/repository/push.go index 5d10237845..8b27f07196 100644 --- a/services/repository/push.go +++ b/services/repository/push.go @@ -11,7 +11,6 @@ import ( "time" "code.gitea.io/gitea/models/db" - git_model "code.gitea.io/gitea/models/git" repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/cache" @@ -259,10 +258,6 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error { commits.Commits = commits.Commits[:setting.UI.FeedMaxCommitNum] } - if err = syncBranchToDB(ctx, repo.ID, opts.PusherID, branch, newCommit); err != nil { - return fmt.Errorf("git_model.UpdateBranch %s:%s failed: %v", repo.FullName(), branch, err) - } - notify_service.PushCommits(ctx, pusher, repo, opts, commits) // Cache for big repository @@ -275,10 +270,6 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error { // close all related pulls log.Error("close related pull request failed: %v", err) } - - if err := git_model.AddDeletedBranch(ctx, repo.ID, branch, pusher.ID); err != nil { - return fmt.Errorf("AddDeletedBranch %s:%s failed: %v", repo.FullName(), branch, err) - } } // Even if user delete a branch on a repository which he didn't watch, he will be watch that. diff --git a/tests/integration/git_push_test.go b/tests/integration/git_push_test.go new file mode 100644 index 0000000000..cb2910b175 --- /dev/null +++ b/tests/integration/git_push_test.go @@ -0,0 +1,131 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package integration + +import ( + "fmt" + "net/url" + "testing" + + "code.gitea.io/gitea/models/db" + git_model "code.gitea.io/gitea/models/git" + "code.gitea.io/gitea/models/unittest" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/git" + repo_service "code.gitea.io/gitea/services/repository" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGitPush(t *testing.T) { + onGiteaRun(t, testGitPush) +} + +func testGitPush(t *testing.T, u *url.URL) { + t.Run("Push branches at once", func(t *testing.T) { + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { + for i := 0; i < 100; i++ { + branchName := fmt.Sprintf("branch-%d", i) + pushed = append(pushed, branchName) + doGitCreateBranch(gitPath, branchName)(t) + } + pushed = append(pushed, "master") + doGitPushTestRepository(gitPath, "origin", "--all")(t) + return pushed, deleted + }) + }) + + t.Run("Push branches one by one", func(t *testing.T) { + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { + for i := 0; i < 100; i++ { + branchName := fmt.Sprintf("branch-%d", i) + doGitCreateBranch(gitPath, branchName)(t) + doGitPushTestRepository(gitPath, "origin", branchName)(t) + pushed = append(pushed, branchName) + } + return pushed, deleted + }) + }) + + t.Run("Delete branches", func(t *testing.T) { + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { + doGitPushTestRepository(gitPath, "origin", "master")(t) // make sure master is the default branch instead of a branch we are going to delete + pushed = append(pushed, "master") + + for i := 0; i < 100; i++ { + branchName := fmt.Sprintf("branch-%d", i) + pushed = append(pushed, branchName) + doGitCreateBranch(gitPath, branchName)(t) + } + doGitPushTestRepository(gitPath, "origin", "--all")(t) + + for i := 0; i < 10; i++ { + branchName := fmt.Sprintf("branch-%d", i) + doGitPushTestRepository(gitPath, "origin", "--delete", branchName)(t) + deleted = append(deleted, branchName) + } + return pushed, deleted + }) + }) +} + +func runTestGitPush(t *testing.T, u *url.URL, gitOperation func(t *testing.T, gitPath string) (pushed, deleted []string)) { + user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) + repo, err := repo_service.CreateRepository(db.DefaultContext, user, user, repo_service.CreateRepoOptions{ + Name: "repo-to-push", + Description: "test git push", + AutoInit: false, + DefaultBranch: "main", + IsPrivate: false, + }) + require.NoError(t, err) + require.NotEmpty(t, repo) + + gitPath := t.TempDir() + + doGitInitTestRepository(gitPath)(t) + + oldPath := u.Path + oldUser := u.User + defer func() { + u.Path = oldPath + u.User = oldUser + }() + u.Path = repo.FullName() + ".git" + u.User = url.UserPassword(user.LowerName, userPassword) + + doGitAddRemote(gitPath, "origin", u)(t) + + gitRepo, err := git.OpenRepository(git.DefaultContext, gitPath) + require.NoError(t, err) + defer gitRepo.Close() + + pushedBranches, deletedBranches := gitOperation(t, gitPath) + + dbBranches := make([]*git_model.Branch, 0) + require.NoError(t, db.GetEngine(db.DefaultContext).Where("repo_id=?", repo.ID).Find(&dbBranches)) + assert.Equalf(t, len(pushedBranches), len(dbBranches), "mismatched number of branches in db") + dbBranchesMap := make(map[string]*git_model.Branch, len(dbBranches)) + for _, branch := range dbBranches { + dbBranchesMap[branch.Name] = branch + } + + deletedBranchesMap := make(map[string]bool, len(deletedBranches)) + for _, branchName := range deletedBranches { + deletedBranchesMap[branchName] = true + } + + for _, branchName := range pushedBranches { + branch, ok := dbBranchesMap[branchName] + deleted := deletedBranchesMap[branchName] + assert.True(t, ok, "branch %s not found in database", branchName) + assert.Equal(t, deleted, branch.IsDeleted, "IsDeleted of %s is %v, but it's expected to be %v", branchName, branch.IsDeleted, deleted) + commitID, err := gitRepo.GetBranchCommitID(branchName) + require.NoError(t, err) + assert.Equal(t, commitID, branch.CommitID) + } + + require.NoError(t, repo_service.DeleteRepositoryDirectly(db.DefaultContext, user, repo.ID)) +} From 0a53eb838d547d3b63ff39be95ca7ca9c9ef2f93 Mon Sep 17 00:00:00 2001 From: oliverpool Date: Mon, 18 Mar 2024 12:26:38 +0100 Subject: [PATCH 2/4] use gitRepo.GetCommit directly and give it a commitID instead of a branchName (a bit more correct and faster) --- routers/private/hook_post_receive.go | 4 +--- services/repository/branch.go | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/routers/private/hook_post_receive.go b/routers/private/hook_post_receive.go index c34c0013d5..ebe5424837 100644 --- a/routers/private/hook_post_receive.go +++ b/routers/private/hook_post_receive.go @@ -142,9 +142,7 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { commitIDs = append(commitIDs, update.NewCommitID) } - if err := repo_service.SyncBranchesToDB(ctx, repo.ID, opts.UserID, branchNames, commitIDs, func(commitID string) (*git.Commit, error) { - return gitRepo.GetCommit(commitID) - }); err != nil { + if err := repo_service.SyncBranchesToDB(ctx, repo.ID, opts.UserID, branchNames, commitIDs, gitRepo.GetCommit); err != nil { ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ Err: fmt.Sprintf("Failed to sync branch to DB in repository: %s/%s Error: %v", ownerName, repoName, err), }) diff --git a/services/repository/branch.go b/services/repository/branch.go index 9a0ff443af..39506152f1 100644 --- a/services/repository/branch.go +++ b/services/repository/branch.go @@ -283,7 +283,7 @@ func SyncBranchesToDB(ctx context.Context, repoID, pusherID int64, branchNames, continue } - commit, err := getCommit(branchName) + commit, err := getCommit(commitID) if err != nil { return fmt.Errorf("get commit of %s failed: %v", branchName, err) } From 66a135f6f237c4bb26700a0d2cc026b5acce36af Mon Sep 17 00:00:00 2001 From: oliverpool Date: Mon, 18 Mar 2024 12:26:49 +0100 Subject: [PATCH 3/4] [BUG] Restore deleted branches --- services/repository/branch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/repository/branch.go b/services/repository/branch.go index 39506152f1..b683553245 100644 --- a/services/repository/branch.go +++ b/services/repository/branch.go @@ -279,7 +279,7 @@ func SyncBranchesToDB(ctx context.Context, repoID, pusherID int64, branchNames, for i, branchName := range branchNames { commitID := commitIDs[i] branch, exist := branchMap[branchName] - if exist && branch.CommitID == commitID { + if exist && branch.CommitID == commitID && !branch.IsDeleted { continue } From 375222a145802de5926ecbf51eb6808caa8b6bb2 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Mon, 11 Mar 2024 14:42:50 +0800 Subject: [PATCH 4/4] Sync branches first (gitea#29714) Follow gitea#29493. Sync branches to DB first, then trigger push events. --- routers/private/hook_post_receive.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/routers/private/hook_post_receive.go b/routers/private/hook_post_receive.go index ebe5424837..f5527cb15b 100644 --- a/routers/private/hook_post_receive.go +++ b/routers/private/hook_post_receive.go @@ -82,19 +82,6 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { } if repo != nil && len(updates) > 0 { - if err := repo_service.PushUpdates(updates); err != nil { - log.Error("Failed to Update: %s/%s Total Updates: %d", ownerName, repoName, len(updates)) - for i, update := range updates { - log.Error("Failed to Update: %s/%s Update: %d/%d: Branch: %s", ownerName, repoName, i, len(updates), update.RefFullName.BranchName()) - } - log.Error("Failed to Update: %s/%s Error: %v", ownerName, repoName, err) - - ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ - Err: fmt.Sprintf("Failed to Update: %s/%s Error: %v", ownerName, repoName, err), - }) - return - } - branchesToSync := make([]*repo_module.PushUpdateOptions, 0, len(updates)) for _, update := range updates { if !update.RefFullName.IsBranch() { @@ -149,6 +136,19 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { return } } + + if err := repo_service.PushUpdates(updates); err != nil { + log.Error("Failed to Update: %s/%s Total Updates: %d", ownerName, repoName, len(updates)) + for i, update := range updates { + log.Error("Failed to Update: %s/%s Update: %d/%d: Branch: %s", ownerName, repoName, i, len(updates), update.RefFullName.BranchName()) + } + log.Error("Failed to Update: %s/%s Error: %v", ownerName, repoName, err) + + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ + Err: fmt.Sprintf("Failed to Update: %s/%s Error: %v", ownerName, repoName, err), + }) + return + } } // Handle Push Options