1
0
Fork 0
mirror of https://github.com/chrislusf/seaweedfs synced 2025-07-26 21:42:48 +02:00
seaweedfs/weed/worker/tasks/balance/balance_detector.go
Chris Lu aa66852304
Admin UI add maintenance menu (#6944)
* add ui for maintenance

* valid config loading. fix workers page.

* refactor

* grpc between admin and workers

* add a long-running bidirectional grpc call between admin and worker
* use the grpc call to heartbeat
* use the grpc call to communicate
* worker can remove the http client
* admin uses http port + 10000 as its default grpc port

* one task one package

* handles connection failures gracefully with exponential backoff

* grpc with insecure tls

* grpc with optional tls

* fix detecting tls

* change time config from nano seconds to seconds

* add tasks with 3 interfaces

* compiles reducing hard coded

* remove a couple of tasks

* remove hard coded references

* reduce hard coded values

* remove hard coded values

* remove hard coded from templ

* refactor maintenance package

* fix import cycle

* simplify

* simplify

* auto register

* auto register factory

* auto register task types

* self register types

* refactor

* simplify

* remove one task

* register ui

* lazy init executor factories

* use registered task types

* DefaultWorkerConfig remove hard coded task types

* remove more hard coded

* implement get maintenance task

* dynamic task configuration

* "System Settings" should only have system level settings

* adjust menu for tasks

* ensure menu not collapsed

* render job configuration well

* use templ for ui of task configuration

* fix ordering

* fix bugs

* saving duration in seconds

* use value and unit for duration

* Delete WORKER_REFACTORING_PLAN.md

* Delete maintenance.json

* Delete custom_worker_example.go

* remove address from workers

* remove old code from ec task

* remove creating collection button

* reconnect with exponential backoff

* worker use security.toml

* start admin server with tls info from security.toml

* fix "weed admin" cli description
2025-07-06 13:57:02 -07:00

171 lines
4.9 KiB
Go

package balance
import (
"fmt"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// BalanceDetector implements TaskDetector for balance tasks
type BalanceDetector struct {
enabled bool
threshold float64 // Imbalance threshold (0.1 = 10%)
minCheckInterval time.Duration
minVolumeCount int
lastCheck time.Time
}
// Compile-time interface assertions
var (
_ types.TaskDetector = (*BalanceDetector)(nil)
)
// NewBalanceDetector creates a new balance detector
func NewBalanceDetector() *BalanceDetector {
return &BalanceDetector{
enabled: true,
threshold: 0.1, // 10% imbalance threshold
minCheckInterval: 1 * time.Hour,
minVolumeCount: 10, // Don't balance small clusters
lastCheck: time.Time{},
}
}
// GetTaskType returns the task type
func (d *BalanceDetector) GetTaskType() types.TaskType {
return types.TaskTypeBalance
}
// ScanForTasks checks if cluster balance is needed
func (d *BalanceDetector) ScanForTasks(volumeMetrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterInfo) ([]*types.TaskDetectionResult, error) {
if !d.enabled {
return nil, nil
}
glog.V(2).Infof("Scanning for balance tasks...")
// Don't check too frequently
if time.Since(d.lastCheck) < d.minCheckInterval {
return nil, nil
}
d.lastCheck = time.Now()
// Skip if cluster is too small
if len(volumeMetrics) < d.minVolumeCount {
glog.V(2).Infof("Cluster too small for balance (%d volumes < %d minimum)", len(volumeMetrics), d.minVolumeCount)
return nil, nil
}
// Analyze volume distribution across servers
serverVolumeCounts := make(map[string]int)
for _, metric := range volumeMetrics {
serverVolumeCounts[metric.Server]++
}
if len(serverVolumeCounts) < 2 {
glog.V(2).Infof("Not enough servers for balance (%d servers)", len(serverVolumeCounts))
return nil, nil
}
// Calculate balance metrics
totalVolumes := len(volumeMetrics)
avgVolumesPerServer := float64(totalVolumes) / float64(len(serverVolumeCounts))
maxVolumes := 0
minVolumes := totalVolumes
maxServer := ""
minServer := ""
for server, count := range serverVolumeCounts {
if count > maxVolumes {
maxVolumes = count
maxServer = server
}
if count < minVolumes {
minVolumes = count
minServer = server
}
}
// Check if imbalance exceeds threshold
imbalanceRatio := float64(maxVolumes-minVolumes) / avgVolumesPerServer
if imbalanceRatio <= d.threshold {
glog.V(2).Infof("Cluster is balanced (imbalance ratio: %.2f <= %.2f)", imbalanceRatio, d.threshold)
return nil, nil
}
// Create balance task
reason := fmt.Sprintf("Cluster imbalance detected: %.1f%% (max: %d on %s, min: %d on %s, avg: %.1f)",
imbalanceRatio*100, maxVolumes, maxServer, minVolumes, minServer, avgVolumesPerServer)
task := &types.TaskDetectionResult{
TaskType: types.TaskTypeBalance,
Priority: types.TaskPriorityNormal,
Reason: reason,
ScheduleAt: time.Now(),
Parameters: map[string]interface{}{
"imbalance_ratio": imbalanceRatio,
"threshold": d.threshold,
"max_volumes": maxVolumes,
"min_volumes": minVolumes,
"avg_volumes_per_server": avgVolumesPerServer,
"max_server": maxServer,
"min_server": minServer,
"total_servers": len(serverVolumeCounts),
},
}
glog.V(1).Infof("🔄 Found balance task: %s", reason)
return []*types.TaskDetectionResult{task}, nil
}
// ScanInterval returns how often to scan
func (d *BalanceDetector) ScanInterval() time.Duration {
return d.minCheckInterval
}
// IsEnabled returns whether the detector is enabled
func (d *BalanceDetector) IsEnabled() bool {
return d.enabled
}
// SetEnabled sets whether the detector is enabled
func (d *BalanceDetector) SetEnabled(enabled bool) {
d.enabled = enabled
glog.V(1).Infof("🔄 Balance detector enabled: %v", enabled)
}
// SetThreshold sets the imbalance threshold
func (d *BalanceDetector) SetThreshold(threshold float64) {
d.threshold = threshold
glog.V(1).Infof("🔄 Balance threshold set to: %.1f%%", threshold*100)
}
// SetMinCheckInterval sets the minimum time between balance checks
func (d *BalanceDetector) SetMinCheckInterval(interval time.Duration) {
d.minCheckInterval = interval
glog.V(1).Infof("🔄 Balance check interval set to: %v", interval)
}
// SetMinVolumeCount sets the minimum volume count for balance operations
func (d *BalanceDetector) SetMinVolumeCount(count int) {
d.minVolumeCount = count
glog.V(1).Infof("🔄 Balance minimum volume count set to: %d", count)
}
// GetThreshold returns the current imbalance threshold
func (d *BalanceDetector) GetThreshold() float64 {
return d.threshold
}
// GetMinCheckInterval returns the minimum check interval
func (d *BalanceDetector) GetMinCheckInterval() time.Duration {
return d.minCheckInterval
}
// GetMinVolumeCount returns the minimum volume count
func (d *BalanceDetector) GetMinVolumeCount() int {
return d.minVolumeCount
}