From 33f9fb815097f2c179f31892a940e5e0231bb435 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Thu, 4 Jul 2024 20:57:11 +0200 Subject: [PATCH] Fix slow patch checking with commits that add or remove many files (#31548) Running git update-index for every individual file is slow, so add and remove everything with a single git command. When such a big commit lands in the default branch, it could cause PR creation and patch checking for all open PRs to be slow, or time out entirely. For example, a commit that removes 1383 files was measured to take more than 60 seconds and timed out. With this change checking took about a second. This is related to #27967, though this will not help with commits that change many lines in few files. (cherry picked from commit b88e5fc72d99e9d4a0aa9c13f70e0a9e967fe057) --- modules/git/repo_index.go | 35 +++++++++++++++++++++++++++-------- services/pull/patch.go | 29 +++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/modules/git/repo_index.go b/modules/git/repo_index.go index 6aaab242c1..8390570098 100644 --- a/modules/git/repo_index.go +++ b/modules/git/repo_index.go @@ -104,11 +104,8 @@ func (repo *Repository) RemoveFilesFromIndex(filenames ...string) error { buffer := new(bytes.Buffer) for _, file := range filenames { if file != "" { - buffer.WriteString("0 ") - buffer.WriteString(objectFormat.EmptyObjectID().String()) - buffer.WriteByte('\t') - buffer.WriteString(file) - buffer.WriteByte('\000') + // using format: mode SP type SP sha1 TAB path + buffer.WriteString("0 blob " + objectFormat.EmptyObjectID().String() + "\t" + file + "\000") } } return cmd.Run(&RunOpts{ @@ -119,11 +116,33 @@ func (repo *Repository) RemoveFilesFromIndex(filenames ...string) error { }) } +type IndexObjectInfo struct { + Mode string + Object ObjectID + Filename string +} + +// AddObjectsToIndex adds the provided object hashes to the index at the provided filenames +func (repo *Repository) AddObjectsToIndex(objects ...IndexObjectInfo) error { + cmd := NewCommand(repo.Ctx, "update-index", "--add", "--replace", "-z", "--index-info") + stdout := new(bytes.Buffer) + stderr := new(bytes.Buffer) + buffer := new(bytes.Buffer) + for _, object := range objects { + // using format: mode SP type SP sha1 TAB path + buffer.WriteString(object.Mode + " blob " + object.Object.String() + "\t" + object.Filename + "\000") + } + return cmd.Run(&RunOpts{ + Dir: repo.Path, + Stdin: bytes.NewReader(buffer.Bytes()), + Stdout: stdout, + Stderr: stderr, + }) +} + // AddObjectToIndex adds the provided object hash to the index at the provided filename func (repo *Repository) AddObjectToIndex(mode string, object ObjectID, filename string) error { - cmd := NewCommand(repo.Ctx, "update-index", "--add", "--replace", "--cacheinfo").AddDynamicArguments(mode, object.String(), filename) - _, _, err := cmd.RunStdString(&RunOpts{Dir: repo.Path}) - return err + return repo.AddObjectsToIndex(IndexObjectInfo{Mode: mode, Object: object, Filename: filename}) } // WriteTree writes the current index as a tree to the object db and returns its hash diff --git a/services/pull/patch.go b/services/pull/patch.go index 12b79a0625..e90b4bdbbe 100644 --- a/services/pull/patch.go +++ b/services/pull/patch.go @@ -128,7 +128,7 @@ func (e *errMergeConflict) Error() string { return fmt.Sprintf("conflict detected at: %s", e.filename) } -func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, gitRepo *git.Repository) error { +func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, filesToRemove *[]string, filesToAdd *[]git.IndexObjectInfo) error { log.Trace("Attempt to merge:\n%v", file) switch { @@ -142,14 +142,13 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g } // Not a genuine conflict and we can simply remove the file from the index - return gitRepo.RemoveFilesFromIndex(file.stage1.path) + *filesToRemove = append(*filesToRemove, file.stage1.path) + return nil case file.stage1 == nil && file.stage2 != nil && (file.stage3 == nil || file.stage2.SameAs(file.stage3)): // 2. Added in ours but not in theirs or identical in both // // Not a genuine conflict just add to the index - if err := gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(file.stage2.sha), file.stage2.path); err != nil { - return err - } + *filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage2.mode, Object: git.MustIDFromString(file.stage2.sha), Filename: file.stage2.path}) return nil case file.stage1 == nil && file.stage2 != nil && file.stage3 != nil && file.stage2.sha == file.stage3.sha && file.stage2.mode != file.stage3.mode: // 3. Added in both with the same sha but the modes are different @@ -160,7 +159,8 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g // 4. Added in theirs but not ours: // // Not a genuine conflict just add to the index - return gitRepo.AddObjectToIndex(file.stage3.mode, git.MustIDFromString(file.stage3.sha), file.stage3.path) + *filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage3.mode, Object: git.MustIDFromString(file.stage3.sha), Filename: file.stage3.path}) + return nil case file.stage1 == nil: // 5. Created by new in both // @@ -221,7 +221,8 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g return err } hash = strings.TrimSpace(hash) - return gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(hash), file.stage2.path) + *filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage2.mode, Object: git.MustIDFromString(hash), Filename: file.stage2.path}) + return nil default: if file.stage1 != nil { return &errMergeConflict{file.stage1.path} @@ -245,6 +246,9 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo return false, nil, fmt.Errorf("unable to run read-tree -m! Error: %w", err) } + var filesToRemove []string + var filesToAdd []git.IndexObjectInfo + // Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles unmerged := make(chan *unmergedFile) go unmergedFiles(ctx, gitPath, unmerged) @@ -270,7 +274,7 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo } // OK now we have the unmerged file triplet attempt to merge it - if err := attemptMerge(ctx, file, gitPath, gitRepo); err != nil { + if err := attemptMerge(ctx, file, gitPath, &filesToRemove, &filesToAdd); err != nil { if conflictErr, ok := err.(*errMergeConflict); ok { log.Trace("Conflict: %s in %s", conflictErr.filename, description) conflict = true @@ -283,6 +287,15 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo return false, nil, err } } + + // Add and remove files in one command, as this is slow with many files otherwise + if err := gitRepo.RemoveFilesFromIndex(filesToRemove...); err != nil { + return false, nil, err + } + if err := gitRepo.AddObjectsToIndex(filesToAdd...); err != nil { + return false, nil, err + } + return conflict, conflictedFiles, nil }