forgejo/models/db/iterate.go
Mathieu Fenniak 79f6f8e508 fix: db.Iterate can miss records, can return records twice (#9657)
Fixes #9644.

Rewrites `db.Iterate` so that it performs DB queries in this format:
- First: `SELECT ...columns... FROM table ORDER BY id LIMIT ...buffer-size...`
- Subsequent buffer fills: adding a `WHERE id > ...last-id-from-previous...`

This approach:
- Prevents records from being missed or returned twice
- Returns records in a predictable order
- Should be faster, by virtue of using database indexes on the primary key to perform the query
- Doesn't rely on any unpredictable database behaviour when using `LIMIT` and `OFFSET` without an `ORDER BY`
- (Downside: does require reflection to read field values off Go structures for the primary key value)

Expands the automated tests to include the predicted failure case identified in #9644, which verified the previous broken behaviour, as well as verifying that the `cond` parameter is applied which was previously not covered by test automation.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/9657
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
2025-10-12 21:47:26 +02:00

86 lines
1.9 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package db
import (
"context"
"fmt"
"reflect"
"forgejo.org/modules/setting"
"xorm.io/builder"
)
// Iterate iterate all the Bean object. The table being iterated must have a single-column primary key.
func Iterate[Bean any](ctx context.Context, cond builder.Cond, f func(ctx context.Context, bean *Bean) error) error {
var dummy Bean
batchSize := setting.Database.IterateBufferSize
table, err := TableInfo(&dummy)
if err != nil {
return fmt.Errorf("unable to fetch table info for bean %v: %w", dummy, err)
}
if len(table.PrimaryKeys) != 1 {
return fmt.Errorf("iterate only supported on a table with 1 primary key field, but table %s had %d", table.Name, len(table.PrimaryKeys))
}
pkDbName := table.PrimaryKeys[0]
var pkStructFieldName string
for _, c := range table.Columns() {
if c.Name == pkDbName {
pkStructFieldName = c.FieldName
break
}
}
if pkStructFieldName == "" {
return fmt.Errorf("iterate unable to identify struct field for primary key %s", pkDbName)
}
var lastPK any
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
beans := make([]*Bean, 0, batchSize)
sess := GetEngine(ctx)
sess = sess.OrderBy(pkDbName)
if cond != nil {
sess = sess.Where(cond)
}
if lastPK != nil {
sess = sess.Where(builder.Gt{pkDbName: lastPK})
}
if err := sess.Limit(batchSize).Find(&beans); err != nil {
return err
}
if len(beans) == 0 {
return nil
}
for _, bean := range beans {
if err := f(ctx, bean); err != nil {
return err
}
}
lastBean := beans[len(beans)-1]
lastPK = extractFieldValue(lastBean, pkStructFieldName)
}
}
}
func extractFieldValue(bean any, fieldName string) any {
v := reflect.ValueOf(bean)
if v.Kind() == reflect.Ptr {
v = v.Elem()
}
field := v.FieldByName(fieldName)
return field.Interface()
}