forgejo/models/db/iterate_test.go
Mathieu Fenniak 79f6f8e508 fix: db.Iterate can miss records, can return records twice (#9657)
Fixes #9644.

Rewrites `db.Iterate` so that it performs DB queries in this format:
- First: `SELECT ...columns... FROM table ORDER BY id LIMIT ...buffer-size...`
- Subsequent buffer fills: adding a `WHERE id > ...last-id-from-previous...`

This approach:
- Prevents records from being missed or returned twice
- Returns records in a predictable order
- Should be faster, by virtue of using database indexes on the primary key to perform the query
- Doesn't rely on any unpredictable database behaviour when using `LIMIT` and `OFFSET` without an `ORDER BY`
- (Downside: does require reflection to read field values off Go structures for the primary key value)

Expands the automated tests to include the predicted failure case identified in #9644, which verified the previous broken behaviour, as well as verifying that the `cond` parameter is applied which was previously not covered by test automation.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/9657
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
2025-10-12 21:47:26 +02:00

117 lines
4 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package db_test
import (
"context"
"fmt"
"slices"
"testing"
"forgejo.org/models/db"
repo_model "forgejo.org/models/repo"
"forgejo.org/models/unittest"
"forgejo.org/modules/setting"
"forgejo.org/modules/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"xorm.io/builder"
)
func TestIterate(t *testing.T) {
db.SetLogSQL(t.Context(), true)
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 50)()
t.Run("No Modifications", func(t *testing.T) {
require.NoError(t, unittest.PrepareTestDatabase())
xe, err := unittest.GetXORMEngine()
require.NoError(t, err)
require.NoError(t, xe.Sync(&repo_model.RepoUnit{}))
// Fetch all the repo unit IDs...
var remainingRepoIDs []int64
db.GetEngine(t.Context()).Table(&repo_model.RepoUnit{}).Cols("id").Find(&remainingRepoIDs)
// Ensure that every repo unit ID is found when doing iterate:
err = db.Iterate(t.Context(), nil, func(ctx context.Context, repo *repo_model.RepoUnit) error {
remainingRepoIDs = slices.DeleteFunc(remainingRepoIDs, func(n int64) bool {
return repo.ID == n
})
return nil
})
require.NoError(t, err)
assert.Empty(t, remainingRepoIDs)
})
t.Run("Concurrent Delete", func(t *testing.T) {
require.NoError(t, unittest.PrepareTestDatabase())
xe, err := unittest.GetXORMEngine()
require.NoError(t, err)
require.NoError(t, xe.Sync(&repo_model.RepoUnit{}))
// Fetch all the repo unit IDs...
var remainingRepoIDs []int64
db.GetEngine(t.Context()).Table(&repo_model.RepoUnit{}).Cols("id").Find(&remainingRepoIDs)
// Ensure that every repo unit ID is found, even if someone else performs a DELETE on the table while we're
// iterating. In real-world usage the deleted record may or may not be returned, but the important
// subject-under-test is that no *other* record is skipped.
didDelete := false
err = db.Iterate(t.Context(), nil, func(ctx context.Context, repo *repo_model.RepoUnit) error {
// While on page 2 (assuming ID ordering, 50 record buffer size)...
if repo.ID == 51 {
// Delete a record that would have been on page 1.
affected, err := db.GetEngine(t.Context()).ID(25).Delete(&repo_model.RepoUnit{})
if err != nil {
return err
} else if affected != 1 {
return fmt.Errorf("expected to delete 1 record, but affected %d records", affected)
}
didDelete = true
}
remainingRepoIDs = slices.DeleteFunc(remainingRepoIDs, func(n int64) bool {
return repo.ID == n
})
return nil
})
require.NoError(t, err)
assert.True(t, didDelete, "didDelete")
assert.Empty(t, remainingRepoIDs)
})
t.Run("Verify cond applied", func(t *testing.T) {
require.NoError(t, unittest.PrepareTestDatabase())
xe, err := unittest.GetXORMEngine()
require.NoError(t, err)
require.NoError(t, xe.Sync(&repo_model.RepoUnit{}))
// Fetch all the repo unit IDs...
var remainingRepoIDs []int64
db.GetEngine(t.Context()).Table(&repo_model.RepoUnit{}).Cols("id").Find(&remainingRepoIDs)
// Remove those that we're not expecting to find based upon `Iterate`'s condition. We'll trim the front few
// records and last few records, which will confirm that cond is applied on all pages.
remainingRepoIDs = slices.DeleteFunc(remainingRepoIDs, func(n int64) bool {
return n <= 15 || n > 1000
})
err = db.Iterate(t.Context(), builder.Gt{"id": 15}.And(builder.Lt{"id": 1000}), func(ctx context.Context, repo *repo_model.RepoUnit) error {
removedRecord := false
// Remove the record from remainingRepoIDs, but track to make sure we did actually remove a record
remainingRepoIDs = slices.DeleteFunc(remainingRepoIDs, func(n int64) bool {
if repo.ID == n {
removedRecord = true
return true
}
return false
})
if !removedRecord {
return fmt.Errorf("unable to find record in remainingRepoIDs for repo %d, indicating a cond application failure", repo.ID)
}
return nil
})
require.NoError(t, err)
assert.Empty(t, remainingRepoIDs)
})
}