mirror of
https://codeberg.org/forgejo/forgejo
synced 2025-10-19 06:20:45 +02:00
Fixes #9644. Rewrites `db.Iterate` so that it performs DB queries in this format: - First: `SELECT ...columns... FROM table ORDER BY id LIMIT ...buffer-size...` - Subsequent buffer fills: adding a `WHERE id > ...last-id-from-previous...` This approach: - Prevents records from being missed or returned twice - Returns records in a predictable order - Should be faster, by virtue of using database indexes on the primary key to perform the query - Doesn't rely on any unpredictable database behaviour when using `LIMIT` and `OFFSET` without an `ORDER BY` - (Downside: does require reflection to read field values off Go structures for the primary key value) Expands the automated tests to include the predicted failure case identified in #9644, which verified the previous broken behaviour, as well as verifying that the `cond` parameter is applied which was previously not covered by test automation. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/9657 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net> Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
86 lines
1.9 KiB
Go
86 lines
1.9 KiB
Go
// Copyright 2022 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package db
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"reflect"
|
|
|
|
"forgejo.org/modules/setting"
|
|
|
|
"xorm.io/builder"
|
|
)
|
|
|
|
// Iterate iterate all the Bean object. The table being iterated must have a single-column primary key.
|
|
func Iterate[Bean any](ctx context.Context, cond builder.Cond, f func(ctx context.Context, bean *Bean) error) error {
|
|
var dummy Bean
|
|
batchSize := setting.Database.IterateBufferSize
|
|
|
|
table, err := TableInfo(&dummy)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to fetch table info for bean %v: %w", dummy, err)
|
|
}
|
|
if len(table.PrimaryKeys) != 1 {
|
|
return fmt.Errorf("iterate only supported on a table with 1 primary key field, but table %s had %d", table.Name, len(table.PrimaryKeys))
|
|
}
|
|
|
|
pkDbName := table.PrimaryKeys[0]
|
|
var pkStructFieldName string
|
|
|
|
for _, c := range table.Columns() {
|
|
if c.Name == pkDbName {
|
|
pkStructFieldName = c.FieldName
|
|
break
|
|
}
|
|
}
|
|
if pkStructFieldName == "" {
|
|
return fmt.Errorf("iterate unable to identify struct field for primary key %s", pkDbName)
|
|
}
|
|
|
|
var lastPK any
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
beans := make([]*Bean, 0, batchSize)
|
|
|
|
sess := GetEngine(ctx)
|
|
sess = sess.OrderBy(pkDbName)
|
|
if cond != nil {
|
|
sess = sess.Where(cond)
|
|
}
|
|
if lastPK != nil {
|
|
sess = sess.Where(builder.Gt{pkDbName: lastPK})
|
|
}
|
|
|
|
if err := sess.Limit(batchSize).Find(&beans); err != nil {
|
|
return err
|
|
}
|
|
if len(beans) == 0 {
|
|
return nil
|
|
}
|
|
|
|
for _, bean := range beans {
|
|
if err := f(ctx, bean); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
lastBean := beans[len(beans)-1]
|
|
lastPK = extractFieldValue(lastBean, pkStructFieldName)
|
|
}
|
|
}
|
|
}
|
|
|
|
func extractFieldValue(bean any, fieldName string) any {
|
|
v := reflect.ValueOf(bean)
|
|
if v.Kind() == reflect.Ptr {
|
|
v = v.Elem()
|
|
}
|
|
field := v.FieldByName(fieldName)
|
|
return field.Interface()
|
|
}
|