Admin UI add maintenance menu (#6944)

* add ui for maintenance

* valid config loading. fix workers page.

* refactor

* grpc between admin and workers

* add a long-running bidirectional grpc call between admin and worker
* use the grpc call to heartbeat
* use the grpc call to communicate
* worker can remove the http client
* admin uses http port + 10000 as its default grpc port

* one task one package

* handles connection failures gracefully with exponential backoff

* grpc with insecure tls

* grpc with optional tls

* fix detecting tls

* change time config from nano seconds to seconds

* add tasks with 3 interfaces

* compiles reducing hard coded

* remove a couple of tasks

* remove hard coded references

* reduce hard coded values

* remove hard coded values

* remove hard coded from templ

* refactor maintenance package

* fix import cycle

* simplify

* simplify

* auto register

* auto register factory

* auto register task types

* self register types

* refactor

* simplify

* remove one task

* register ui

* lazy init executor factories

* use registered task types

* DefaultWorkerConfig remove hard coded task types

* remove more hard coded

* implement get maintenance task

* dynamic task configuration

* "System Settings" should only have system level settings

* adjust menu for tasks

* ensure menu not collapsed

* render job configuration well

* use templ for ui of task configuration

* fix ordering

* fix bugs

* saving duration in seconds

* use value and unit for duration

* Delete WORKER_REFACTORING_PLAN.md

* Delete maintenance.json

* Delete custom_worker_example.go

* remove address from workers

* remove old code from ec task

* remove creating collection button

* reconnect with exponential backoff

* worker use security.toml

* start admin server with tls info from security.toml

* fix "weed admin" cli description
This commit is contained in:
Chris Lu
2025-07-06 13:57:02 -07:00
committed by GitHub
parent 302e62d480
commit aa66852304
76 changed files with 18218 additions and 206 deletions

761
weed/worker/client.go Normal file
View File

@@ -0,0 +1,761 @@
package worker
import (
"context"
"fmt"
"io"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
"google.golang.org/grpc"
)
// GrpcAdminClient implements AdminClient using gRPC bidirectional streaming
type GrpcAdminClient struct {
adminAddress string
workerID string
dialOption grpc.DialOption
conn *grpc.ClientConn
client worker_pb.WorkerServiceClient
stream worker_pb.WorkerService_WorkerStreamClient
streamCtx context.Context
streamCancel context.CancelFunc
connected bool
reconnecting bool
shouldReconnect bool
mutex sync.RWMutex
// Reconnection parameters
maxReconnectAttempts int
reconnectBackoff time.Duration
maxReconnectBackoff time.Duration
reconnectMultiplier float64
// Worker registration info for re-registration after reconnection
lastWorkerInfo *types.Worker
// Channels for communication
outgoing chan *worker_pb.WorkerMessage
incoming chan *worker_pb.AdminMessage
responseChans map[string]chan *worker_pb.AdminMessage
responsesMutex sync.RWMutex
// Shutdown channel
shutdownChan chan struct{}
}
// NewGrpcAdminClient creates a new gRPC admin client
func NewGrpcAdminClient(adminAddress string, workerID string, dialOption grpc.DialOption) *GrpcAdminClient {
// Admin uses HTTP port + 10000 as gRPC port
grpcAddress := pb.ServerToGrpcAddress(adminAddress)
return &GrpcAdminClient{
adminAddress: grpcAddress,
workerID: workerID,
dialOption: dialOption,
shouldReconnect: true,
maxReconnectAttempts: 0, // 0 means infinite attempts
reconnectBackoff: 1 * time.Second,
maxReconnectBackoff: 30 * time.Second,
reconnectMultiplier: 1.5,
outgoing: make(chan *worker_pb.WorkerMessage, 100),
incoming: make(chan *worker_pb.AdminMessage, 100),
responseChans: make(map[string]chan *worker_pb.AdminMessage),
shutdownChan: make(chan struct{}),
}
}
// Connect establishes gRPC connection to admin server with TLS detection
func (c *GrpcAdminClient) Connect() error {
c.mutex.Lock()
defer c.mutex.Unlock()
if c.connected {
return fmt.Errorf("already connected")
}
// Detect TLS support and create appropriate connection
conn, err := c.createConnection()
if err != nil {
return fmt.Errorf("failed to connect to admin server: %v", err)
}
c.conn = conn
c.client = worker_pb.NewWorkerServiceClient(conn)
// Create bidirectional stream
c.streamCtx, c.streamCancel = context.WithCancel(context.Background())
stream, err := c.client.WorkerStream(c.streamCtx)
if err != nil {
c.conn.Close()
return fmt.Errorf("failed to create worker stream: %v", err)
}
c.stream = stream
c.connected = true
// Start stream handlers and reconnection loop
go c.handleOutgoing()
go c.handleIncoming()
go c.reconnectionLoop()
glog.Infof("Connected to admin server at %s", c.adminAddress)
return nil
}
// createConnection attempts to connect using the provided dial option
func (c *GrpcAdminClient) createConnection() (*grpc.ClientConn, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
conn, err := pb.GrpcDial(ctx, c.adminAddress, false, c.dialOption)
if err != nil {
return nil, fmt.Errorf("failed to connect to admin server: %v", err)
}
glog.Infof("Connected to admin server at %s", c.adminAddress)
return conn, nil
}
// Disconnect closes the gRPC connection
func (c *GrpcAdminClient) Disconnect() error {
c.mutex.Lock()
defer c.mutex.Unlock()
if !c.connected {
return nil
}
c.connected = false
c.shouldReconnect = false
// Send shutdown signal to stop reconnection loop
select {
case c.shutdownChan <- struct{}{}:
default:
}
// Send shutdown message
shutdownMsg := &worker_pb.WorkerMessage{
WorkerId: c.workerID,
Timestamp: time.Now().Unix(),
Message: &worker_pb.WorkerMessage_Shutdown{
Shutdown: &worker_pb.WorkerShutdown{
WorkerId: c.workerID,
Reason: "normal shutdown",
},
},
}
select {
case c.outgoing <- shutdownMsg:
case <-time.After(time.Second):
glog.Warningf("Failed to send shutdown message")
}
// Cancel stream context
if c.streamCancel != nil {
c.streamCancel()
}
// Close stream
if c.stream != nil {
c.stream.CloseSend()
}
// Close connection
if c.conn != nil {
c.conn.Close()
}
// Close channels
close(c.outgoing)
close(c.incoming)
glog.Infof("Disconnected from admin server")
return nil
}
// reconnectionLoop handles automatic reconnection with exponential backoff
func (c *GrpcAdminClient) reconnectionLoop() {
backoff := c.reconnectBackoff
attempts := 0
for {
select {
case <-c.shutdownChan:
return
default:
}
c.mutex.RLock()
shouldReconnect := c.shouldReconnect && !c.connected && !c.reconnecting
c.mutex.RUnlock()
if !shouldReconnect {
time.Sleep(time.Second)
continue
}
c.mutex.Lock()
c.reconnecting = true
c.mutex.Unlock()
glog.Infof("Attempting to reconnect to admin server (attempt %d)", attempts+1)
// Attempt to reconnect
if err := c.reconnect(); err != nil {
attempts++
glog.Errorf("Reconnection attempt %d failed: %v", attempts, err)
// Reset reconnecting flag
c.mutex.Lock()
c.reconnecting = false
c.mutex.Unlock()
// Check if we should give up
if c.maxReconnectAttempts > 0 && attempts >= c.maxReconnectAttempts {
glog.Errorf("Max reconnection attempts (%d) reached, giving up", c.maxReconnectAttempts)
c.mutex.Lock()
c.shouldReconnect = false
c.mutex.Unlock()
return
}
// Wait with exponential backoff
glog.Infof("Waiting %v before next reconnection attempt", backoff)
select {
case <-c.shutdownChan:
return
case <-time.After(backoff):
}
// Increase backoff
backoff = time.Duration(float64(backoff) * c.reconnectMultiplier)
if backoff > c.maxReconnectBackoff {
backoff = c.maxReconnectBackoff
}
} else {
// Successful reconnection
attempts = 0
backoff = c.reconnectBackoff
glog.Infof("Successfully reconnected to admin server")
c.mutex.Lock()
c.reconnecting = false
c.mutex.Unlock()
}
}
}
// reconnect attempts to re-establish the connection
func (c *GrpcAdminClient) reconnect() error {
// Clean up existing connection completely
c.mutex.Lock()
if c.streamCancel != nil {
c.streamCancel()
}
if c.stream != nil {
c.stream.CloseSend()
}
if c.conn != nil {
c.conn.Close()
}
c.mutex.Unlock()
// Create new connection
conn, err := c.createConnection()
if err != nil {
return fmt.Errorf("failed to create connection: %v", err)
}
client := worker_pb.NewWorkerServiceClient(conn)
// Create new stream
streamCtx, streamCancel := context.WithCancel(context.Background())
stream, err := client.WorkerStream(streamCtx)
if err != nil {
conn.Close()
streamCancel()
return fmt.Errorf("failed to create stream: %v", err)
}
// Update client state
c.mutex.Lock()
c.conn = conn
c.client = client
c.stream = stream
c.streamCtx = streamCtx
c.streamCancel = streamCancel
c.connected = true
c.mutex.Unlock()
// Restart stream handlers
go c.handleOutgoing()
go c.handleIncoming()
// Re-register worker if we have previous registration info
c.mutex.RLock()
workerInfo := c.lastWorkerInfo
c.mutex.RUnlock()
if workerInfo != nil {
glog.Infof("Re-registering worker after reconnection...")
if err := c.sendRegistration(workerInfo); err != nil {
glog.Errorf("Failed to re-register worker: %v", err)
// Don't fail the reconnection because of registration failure
// The registration will be retried on next heartbeat or operation
}
}
return nil
}
// handleOutgoing processes outgoing messages to admin
func (c *GrpcAdminClient) handleOutgoing() {
for msg := range c.outgoing {
c.mutex.RLock()
connected := c.connected
stream := c.stream
c.mutex.RUnlock()
if !connected {
break
}
if err := stream.Send(msg); err != nil {
glog.Errorf("Failed to send message to admin: %v", err)
c.mutex.Lock()
c.connected = false
c.mutex.Unlock()
break
}
}
}
// handleIncoming processes incoming messages from admin
func (c *GrpcAdminClient) handleIncoming() {
for {
c.mutex.RLock()
connected := c.connected
stream := c.stream
c.mutex.RUnlock()
if !connected {
break
}
msg, err := stream.Recv()
if err != nil {
if err == io.EOF {
glog.Infof("Admin server closed the stream")
} else {
glog.Errorf("Failed to receive message from admin: %v", err)
}
c.mutex.Lock()
c.connected = false
c.mutex.Unlock()
break
}
// Route message to waiting goroutines or general handler
select {
case c.incoming <- msg:
case <-time.After(time.Second):
glog.Warningf("Incoming message buffer full, dropping message")
}
}
}
// RegisterWorker registers the worker with the admin server
func (c *GrpcAdminClient) RegisterWorker(worker *types.Worker) error {
if !c.connected {
return fmt.Errorf("not connected to admin server")
}
// Store worker info for re-registration after reconnection
c.mutex.Lock()
c.lastWorkerInfo = worker
c.mutex.Unlock()
return c.sendRegistration(worker)
}
// sendRegistration sends the registration message and waits for response
func (c *GrpcAdminClient) sendRegistration(worker *types.Worker) error {
capabilities := make([]string, len(worker.Capabilities))
for i, cap := range worker.Capabilities {
capabilities[i] = string(cap)
}
msg := &worker_pb.WorkerMessage{
WorkerId: c.workerID,
Timestamp: time.Now().Unix(),
Message: &worker_pb.WorkerMessage_Registration{
Registration: &worker_pb.WorkerRegistration{
WorkerId: c.workerID,
Address: worker.Address,
Capabilities: capabilities,
MaxConcurrent: int32(worker.MaxConcurrent),
Metadata: make(map[string]string),
},
},
}
select {
case c.outgoing <- msg:
case <-time.After(5 * time.Second):
return fmt.Errorf("failed to send registration message: timeout")
}
// Wait for registration response
timeout := time.NewTimer(10 * time.Second)
defer timeout.Stop()
for {
select {
case response := <-c.incoming:
if regResp := response.GetRegistrationResponse(); regResp != nil {
if regResp.Success {
glog.Infof("Worker registered successfully: %s", regResp.Message)
return nil
}
return fmt.Errorf("registration failed: %s", regResp.Message)
}
case <-timeout.C:
return fmt.Errorf("registration timeout")
}
}
}
// SendHeartbeat sends heartbeat to admin server
func (c *GrpcAdminClient) SendHeartbeat(workerID string, status *types.WorkerStatus) error {
if !c.connected {
// Wait for reconnection for a short time
if err := c.waitForConnection(10 * time.Second); err != nil {
return fmt.Errorf("not connected to admin server: %v", err)
}
}
taskIds := make([]string, len(status.CurrentTasks))
for i, task := range status.CurrentTasks {
taskIds[i] = task.ID
}
msg := &worker_pb.WorkerMessage{
WorkerId: c.workerID,
Timestamp: time.Now().Unix(),
Message: &worker_pb.WorkerMessage_Heartbeat{
Heartbeat: &worker_pb.WorkerHeartbeat{
WorkerId: c.workerID,
Status: status.Status,
CurrentLoad: int32(status.CurrentLoad),
MaxConcurrent: int32(status.MaxConcurrent),
CurrentTaskIds: taskIds,
TasksCompleted: int32(status.TasksCompleted),
TasksFailed: int32(status.TasksFailed),
UptimeSeconds: int64(status.Uptime.Seconds()),
},
},
}
select {
case c.outgoing <- msg:
return nil
case <-time.After(time.Second):
return fmt.Errorf("failed to send heartbeat: timeout")
}
}
// RequestTask requests a new task from admin server
func (c *GrpcAdminClient) RequestTask(workerID string, capabilities []types.TaskType) (*types.Task, error) {
if !c.connected {
// Wait for reconnection for a short time
if err := c.waitForConnection(5 * time.Second); err != nil {
return nil, fmt.Errorf("not connected to admin server: %v", err)
}
}
caps := make([]string, len(capabilities))
for i, cap := range capabilities {
caps[i] = string(cap)
}
msg := &worker_pb.WorkerMessage{
WorkerId: c.workerID,
Timestamp: time.Now().Unix(),
Message: &worker_pb.WorkerMessage_TaskRequest{
TaskRequest: &worker_pb.TaskRequest{
WorkerId: c.workerID,
Capabilities: caps,
AvailableSlots: 1, // Request one task
},
},
}
select {
case c.outgoing <- msg:
case <-time.After(time.Second):
return nil, fmt.Errorf("failed to send task request: timeout")
}
// Wait for task assignment
timeout := time.NewTimer(5 * time.Second)
defer timeout.Stop()
for {
select {
case response := <-c.incoming:
if taskAssign := response.GetTaskAssignment(); taskAssign != nil {
// Convert parameters map[string]string to map[string]interface{}
parameters := make(map[string]interface{})
for k, v := range taskAssign.Params.Parameters {
parameters[k] = v
}
// Convert to our task type
task := &types.Task{
ID: taskAssign.TaskId,
Type: types.TaskType(taskAssign.TaskType),
Status: types.TaskStatusAssigned,
VolumeID: taskAssign.Params.VolumeId,
Server: taskAssign.Params.Server,
Collection: taskAssign.Params.Collection,
Priority: types.TaskPriority(taskAssign.Priority),
CreatedAt: time.Unix(taskAssign.CreatedTime, 0),
Parameters: parameters,
}
return task, nil
}
case <-timeout.C:
return nil, nil // No task available
}
}
}
// CompleteTask reports task completion to admin server
func (c *GrpcAdminClient) CompleteTask(taskID string, success bool, errorMsg string) error {
if !c.connected {
// Wait for reconnection for a short time
if err := c.waitForConnection(5 * time.Second); err != nil {
return fmt.Errorf("not connected to admin server: %v", err)
}
}
msg := &worker_pb.WorkerMessage{
WorkerId: c.workerID,
Timestamp: time.Now().Unix(),
Message: &worker_pb.WorkerMessage_TaskComplete{
TaskComplete: &worker_pb.TaskComplete{
TaskId: taskID,
WorkerId: c.workerID,
Success: success,
ErrorMessage: errorMsg,
CompletionTime: time.Now().Unix(),
},
},
}
select {
case c.outgoing <- msg:
return nil
case <-time.After(time.Second):
return fmt.Errorf("failed to send task completion: timeout")
}
}
// UpdateTaskProgress updates task progress to admin server
func (c *GrpcAdminClient) UpdateTaskProgress(taskID string, progress float64) error {
if !c.connected {
// Wait for reconnection for a short time
if err := c.waitForConnection(5 * time.Second); err != nil {
return fmt.Errorf("not connected to admin server: %v", err)
}
}
msg := &worker_pb.WorkerMessage{
WorkerId: c.workerID,
Timestamp: time.Now().Unix(),
Message: &worker_pb.WorkerMessage_TaskUpdate{
TaskUpdate: &worker_pb.TaskUpdate{
TaskId: taskID,
WorkerId: c.workerID,
Status: "in_progress",
Progress: float32(progress),
},
},
}
select {
case c.outgoing <- msg:
return nil
case <-time.After(time.Second):
return fmt.Errorf("failed to send task progress: timeout")
}
}
// IsConnected returns whether the client is connected
func (c *GrpcAdminClient) IsConnected() bool {
c.mutex.RLock()
defer c.mutex.RUnlock()
return c.connected
}
// IsReconnecting returns whether the client is currently attempting to reconnect
func (c *GrpcAdminClient) IsReconnecting() bool {
c.mutex.RLock()
defer c.mutex.RUnlock()
return c.reconnecting
}
// SetReconnectionSettings allows configuration of reconnection behavior
func (c *GrpcAdminClient) SetReconnectionSettings(maxAttempts int, initialBackoff, maxBackoff time.Duration, multiplier float64) {
c.mutex.Lock()
defer c.mutex.Unlock()
c.maxReconnectAttempts = maxAttempts
c.reconnectBackoff = initialBackoff
c.maxReconnectBackoff = maxBackoff
c.reconnectMultiplier = multiplier
}
// StopReconnection stops the reconnection loop
func (c *GrpcAdminClient) StopReconnection() {
c.mutex.Lock()
defer c.mutex.Unlock()
c.shouldReconnect = false
}
// StartReconnection starts the reconnection loop
func (c *GrpcAdminClient) StartReconnection() {
c.mutex.Lock()
defer c.mutex.Unlock()
c.shouldReconnect = true
}
// waitForConnection waits for the connection to be established or timeout
func (c *GrpcAdminClient) waitForConnection(timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
c.mutex.RLock()
connected := c.connected
shouldReconnect := c.shouldReconnect
c.mutex.RUnlock()
if connected {
return nil
}
if !shouldReconnect {
return fmt.Errorf("reconnection is disabled")
}
time.Sleep(100 * time.Millisecond)
}
return fmt.Errorf("timeout waiting for connection")
}
// MockAdminClient provides a mock implementation for testing
type MockAdminClient struct {
workerID string
connected bool
tasks []*types.Task
mutex sync.RWMutex
}
// NewMockAdminClient creates a new mock admin client
func NewMockAdminClient() *MockAdminClient {
return &MockAdminClient{
connected: true,
tasks: make([]*types.Task, 0),
}
}
// Connect mock implementation
func (m *MockAdminClient) Connect() error {
m.mutex.Lock()
defer m.mutex.Unlock()
m.connected = true
return nil
}
// Disconnect mock implementation
func (m *MockAdminClient) Disconnect() error {
m.mutex.Lock()
defer m.mutex.Unlock()
m.connected = false
return nil
}
// RegisterWorker mock implementation
func (m *MockAdminClient) RegisterWorker(worker *types.Worker) error {
m.workerID = worker.ID
glog.Infof("Mock: Worker %s registered with capabilities: %v", worker.ID, worker.Capabilities)
return nil
}
// SendHeartbeat mock implementation
func (m *MockAdminClient) SendHeartbeat(workerID string, status *types.WorkerStatus) error {
glog.V(2).Infof("Mock: Heartbeat from worker %s, status: %s, load: %d/%d",
workerID, status.Status, status.CurrentLoad, status.MaxConcurrent)
return nil
}
// RequestTask mock implementation
func (m *MockAdminClient) RequestTask(workerID string, capabilities []types.TaskType) (*types.Task, error) {
m.mutex.Lock()
defer m.mutex.Unlock()
if len(m.tasks) > 0 {
task := m.tasks[0]
m.tasks = m.tasks[1:]
glog.Infof("Mock: Assigned task %s to worker %s", task.ID, workerID)
return task, nil
}
// No tasks available
return nil, nil
}
// CompleteTask mock implementation
func (m *MockAdminClient) CompleteTask(taskID string, success bool, errorMsg string) error {
if success {
glog.Infof("Mock: Task %s completed successfully", taskID)
} else {
glog.Infof("Mock: Task %s failed: %s", taskID, errorMsg)
}
return nil
}
// UpdateTaskProgress mock implementation
func (m *MockAdminClient) UpdateTaskProgress(taskID string, progress float64) error {
glog.V(2).Infof("Mock: Task %s progress: %.1f%%", taskID, progress)
return nil
}
// IsConnected mock implementation
func (m *MockAdminClient) IsConnected() bool {
m.mutex.RLock()
defer m.mutex.RUnlock()
return m.connected
}
// AddMockTask adds a mock task for testing
func (m *MockAdminClient) AddMockTask(task *types.Task) {
m.mutex.Lock()
defer m.mutex.Unlock()
m.tasks = append(m.tasks, task)
}
// CreateAdminClient creates an admin client with the provided dial option
func CreateAdminClient(adminServer string, workerID string, dialOption grpc.DialOption) (AdminClient, error) {
return NewGrpcAdminClient(adminServer, workerID, dialOption), nil
}

111
weed/worker/client_test.go Normal file
View File

@@ -0,0 +1,111 @@
package worker
import (
"context"
"testing"
"github.com/seaweedfs/seaweedfs/weed/pb"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
func TestGrpcConnection(t *testing.T) {
// Test that we can create a gRPC connection with insecure credentials
// This tests the connection setup without requiring a running server
adminAddress := "localhost:33646" // gRPC port for admin server on port 23646
// This should not fail with transport security errors
conn, err := pb.GrpcDial(context.Background(), adminAddress, false, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
// Connection failure is expected when no server is running
// But it should NOT be a transport security error
if err.Error() == "grpc: no transport security set" {
t.Fatalf("Transport security error should not occur with insecure credentials: %v", err)
}
t.Logf("Connection failed as expected (no server running): %v", err)
} else {
// If connection succeeds, clean up
conn.Close()
t.Log("Connection succeeded")
}
}
func TestGrpcAdminClient_Connect(t *testing.T) {
// Test that the GrpcAdminClient can be created and attempt connection
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client := NewGrpcAdminClient("localhost:23646", "test-worker", dialOption)
// This should not fail with transport security errors
err := client.Connect()
if err != nil {
// Connection failure is expected when no server is running
// But it should NOT be a transport security error
if err.Error() == "grpc: no transport security set" {
t.Fatalf("Transport security error should not occur with insecure credentials: %v", err)
}
t.Logf("Connection failed as expected (no server running): %v", err)
} else {
// If connection succeeds, clean up
client.Disconnect()
t.Log("Connection succeeded")
}
}
func TestAdminAddressToGrpcAddress(t *testing.T) {
tests := []struct {
adminAddress string
expected string
}{
{"localhost:9333", "localhost:19333"},
{"localhost:23646", "localhost:33646"},
{"admin.example.com:9333", "admin.example.com:19333"},
{"127.0.0.1:8080", "127.0.0.1:18080"},
}
for _, test := range tests {
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client := NewGrpcAdminClient(test.adminAddress, "test-worker", dialOption)
result := client.adminAddress
if result != test.expected {
t.Errorf("For admin address %s, expected gRPC address %s, got %s",
test.adminAddress, test.expected, result)
}
}
}
func TestMockAdminClient(t *testing.T) {
// Test that the mock client works correctly
client := NewMockAdminClient()
// Should be able to connect/disconnect without errors
err := client.Connect()
if err != nil {
t.Fatalf("Mock client connect failed: %v", err)
}
if !client.IsConnected() {
t.Error("Mock client should be connected")
}
err = client.Disconnect()
if err != nil {
t.Fatalf("Mock client disconnect failed: %v", err)
}
if client.IsConnected() {
t.Error("Mock client should be disconnected")
}
}
func TestCreateAdminClient(t *testing.T) {
// Test client creation
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client, err := CreateAdminClient("localhost:9333", "test-worker", dialOption)
if err != nil {
t.Fatalf("Failed to create admin client: %v", err)
}
if client == nil {
t.Fatal("Client should not be nil")
}
}

View File

@@ -0,0 +1,146 @@
package worker
import (
"strings"
"testing"
"time"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
func TestGrpcClientTLSDetection(t *testing.T) {
// Test that the client can be created with a dial option
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client := NewGrpcAdminClient("localhost:33646", "test-worker", dialOption)
// Test that the client has the correct dial option
if client.dialOption == nil {
t.Error("Client should have a dial option")
}
t.Logf("Client created successfully with dial option")
}
func TestCreateAdminClientGrpc(t *testing.T) {
// Test client creation - admin server port gets transformed to gRPC port
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client, err := CreateAdminClient("localhost:23646", "test-worker", dialOption)
if err != nil {
t.Fatalf("Failed to create admin client: %v", err)
}
if client == nil {
t.Fatal("Client should not be nil")
}
// Verify it's the correct type
grpcClient, ok := client.(*GrpcAdminClient)
if !ok {
t.Fatal("Client should be GrpcAdminClient type")
}
// The admin address should be transformed to the gRPC port (HTTP + 10000)
expectedAddress := "localhost:33646" // 23646 + 10000
if grpcClient.adminAddress != expectedAddress {
t.Errorf("Expected admin address %s, got %s", expectedAddress, grpcClient.adminAddress)
}
if grpcClient.workerID != "test-worker" {
t.Errorf("Expected worker ID test-worker, got %s", grpcClient.workerID)
}
}
func TestConnectionTimeouts(t *testing.T) {
// Test that connections have proper timeouts
// Use localhost with a port that's definitely closed
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client := NewGrpcAdminClient("localhost:1", "test-worker", dialOption) // Port 1 is reserved and won't be open
// Test that the connection creation fails when actually trying to use it
start := time.Now()
err := client.Connect() // This should fail when trying to establish the stream
duration := time.Since(start)
if err == nil {
t.Error("Expected connection to closed port to fail")
} else {
t.Logf("Connection failed as expected: %v", err)
}
// Should fail quickly but not too quickly
if duration > 10*time.Second {
t.Errorf("Connection attempt took too long: %v", duration)
}
}
func TestConnectionWithDialOption(t *testing.T) {
// Test that the connection uses the provided dial option
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client := NewGrpcAdminClient("localhost:1", "test-worker", dialOption) // Port 1 is reserved and won't be open
// Test the actual connection
err := client.Connect()
if err == nil {
t.Error("Expected connection to closed port to fail")
client.Disconnect() // Clean up if it somehow succeeded
} else {
t.Logf("Connection failed as expected: %v", err)
}
// The error should indicate a connection failure
if err != nil && err.Error() != "" {
t.Logf("Connection error message: %s", err.Error())
// The error should contain connection-related terms
if !strings.Contains(err.Error(), "connection") && !strings.Contains(err.Error(), "dial") {
t.Logf("Error message doesn't indicate connection issues: %s", err.Error())
}
}
}
func TestClientWithSecureDialOption(t *testing.T) {
// Test that the client correctly uses a secure dial option
// This would normally use LoadClientTLS, but for testing we'll use insecure
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client := NewGrpcAdminClient("localhost:33646", "test-worker", dialOption)
if client.dialOption == nil {
t.Error("Client should have a dial option")
}
t.Logf("Client created successfully with dial option")
}
func TestConnectionWithRealAddress(t *testing.T) {
// Test connection behavior with a real address that doesn't support gRPC
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client := NewGrpcAdminClient("www.google.com:80", "test-worker", dialOption) // HTTP port, not gRPC
err := client.Connect()
if err == nil {
t.Log("Connection succeeded unexpectedly")
client.Disconnect()
} else {
t.Logf("Connection failed as expected: %v", err)
}
}
func TestDialOptionUsage(t *testing.T) {
// Test that the provided dial option is used for connections
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
client := NewGrpcAdminClient("localhost:1", "test-worker", dialOption) // Port 1 won't support gRPC at all
// Verify the dial option is stored
if client.dialOption == nil {
t.Error("Dial option should be stored in client")
}
// Test connection fails appropriately
err := client.Connect()
if err == nil {
t.Error("Connection should fail to non-gRPC port")
client.Disconnect()
} else {
t.Logf("Connection failed as expected: %v", err)
}
}

348
weed/worker/registry.go Normal file
View File

@@ -0,0 +1,348 @@
package worker
import (
"fmt"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Registry manages workers and their statistics
type Registry struct {
workers map[string]*types.Worker
stats *types.RegistryStats
mutex sync.RWMutex
}
// NewRegistry creates a new worker registry
func NewRegistry() *Registry {
return &Registry{
workers: make(map[string]*types.Worker),
stats: &types.RegistryStats{
TotalWorkers: 0,
ActiveWorkers: 0,
BusyWorkers: 0,
IdleWorkers: 0,
TotalTasks: 0,
CompletedTasks: 0,
FailedTasks: 0,
StartTime: time.Now(),
},
}
}
// RegisterWorker registers a new worker
func (r *Registry) RegisterWorker(worker *types.Worker) error {
r.mutex.Lock()
defer r.mutex.Unlock()
if _, exists := r.workers[worker.ID]; exists {
return fmt.Errorf("worker %s already registered", worker.ID)
}
r.workers[worker.ID] = worker
r.updateStats()
return nil
}
// UnregisterWorker removes a worker from the registry
func (r *Registry) UnregisterWorker(workerID string) error {
r.mutex.Lock()
defer r.mutex.Unlock()
if _, exists := r.workers[workerID]; !exists {
return fmt.Errorf("worker %s not found", workerID)
}
delete(r.workers, workerID)
r.updateStats()
return nil
}
// GetWorker returns a worker by ID
func (r *Registry) GetWorker(workerID string) (*types.Worker, bool) {
r.mutex.RLock()
defer r.mutex.RUnlock()
worker, exists := r.workers[workerID]
return worker, exists
}
// ListWorkers returns all registered workers
func (r *Registry) ListWorkers() []*types.Worker {
r.mutex.RLock()
defer r.mutex.RUnlock()
workers := make([]*types.Worker, 0, len(r.workers))
for _, worker := range r.workers {
workers = append(workers, worker)
}
return workers
}
// GetWorkersByCapability returns workers that support a specific capability
func (r *Registry) GetWorkersByCapability(capability types.TaskType) []*types.Worker {
r.mutex.RLock()
defer r.mutex.RUnlock()
var workers []*types.Worker
for _, worker := range r.workers {
for _, cap := range worker.Capabilities {
if cap == capability {
workers = append(workers, worker)
break
}
}
}
return workers
}
// GetAvailableWorkers returns workers that are available for new tasks
func (r *Registry) GetAvailableWorkers() []*types.Worker {
r.mutex.RLock()
defer r.mutex.RUnlock()
var workers []*types.Worker
for _, worker := range r.workers {
if worker.Status == "active" && worker.CurrentLoad < worker.MaxConcurrent {
workers = append(workers, worker)
}
}
return workers
}
// GetBestWorkerForTask returns the best worker for a specific task
func (r *Registry) GetBestWorkerForTask(taskType types.TaskType) *types.Worker {
r.mutex.RLock()
defer r.mutex.RUnlock()
var bestWorker *types.Worker
var bestScore float64
for _, worker := range r.workers {
// Check if worker supports this task type
supportsTask := false
for _, cap := range worker.Capabilities {
if cap == taskType {
supportsTask = true
break
}
}
if !supportsTask {
continue
}
// Check if worker is available
if worker.Status != "active" || worker.CurrentLoad >= worker.MaxConcurrent {
continue
}
// Calculate score based on current load and capacity
score := float64(worker.MaxConcurrent-worker.CurrentLoad) / float64(worker.MaxConcurrent)
if bestWorker == nil || score > bestScore {
bestWorker = worker
bestScore = score
}
}
return bestWorker
}
// UpdateWorkerHeartbeat updates the last heartbeat time for a worker
func (r *Registry) UpdateWorkerHeartbeat(workerID string) error {
r.mutex.Lock()
defer r.mutex.Unlock()
worker, exists := r.workers[workerID]
if !exists {
return fmt.Errorf("worker %s not found", workerID)
}
worker.LastHeartbeat = time.Now()
return nil
}
// UpdateWorkerLoad updates the current load for a worker
func (r *Registry) UpdateWorkerLoad(workerID string, load int) error {
r.mutex.Lock()
defer r.mutex.Unlock()
worker, exists := r.workers[workerID]
if !exists {
return fmt.Errorf("worker %s not found", workerID)
}
worker.CurrentLoad = load
if load >= worker.MaxConcurrent {
worker.Status = "busy"
} else {
worker.Status = "active"
}
r.updateStats()
return nil
}
// UpdateWorkerStatus updates the status of a worker
func (r *Registry) UpdateWorkerStatus(workerID string, status string) error {
r.mutex.Lock()
defer r.mutex.Unlock()
worker, exists := r.workers[workerID]
if !exists {
return fmt.Errorf("worker %s not found", workerID)
}
worker.Status = status
r.updateStats()
return nil
}
// CleanupStaleWorkers removes workers that haven't sent heartbeats recently
func (r *Registry) CleanupStaleWorkers(timeout time.Duration) int {
r.mutex.Lock()
defer r.mutex.Unlock()
var removedCount int
cutoff := time.Now().Add(-timeout)
for workerID, worker := range r.workers {
if worker.LastHeartbeat.Before(cutoff) {
delete(r.workers, workerID)
removedCount++
}
}
if removedCount > 0 {
r.updateStats()
}
return removedCount
}
// GetStats returns current registry statistics
func (r *Registry) GetStats() *types.RegistryStats {
r.mutex.RLock()
defer r.mutex.RUnlock()
// Create a copy of the stats to avoid race conditions
stats := *r.stats
return &stats
}
// updateStats updates the registry statistics (must be called with lock held)
func (r *Registry) updateStats() {
r.stats.TotalWorkers = len(r.workers)
r.stats.ActiveWorkers = 0
r.stats.BusyWorkers = 0
r.stats.IdleWorkers = 0
for _, worker := range r.workers {
switch worker.Status {
case "active":
if worker.CurrentLoad > 0 {
r.stats.ActiveWorkers++
} else {
r.stats.IdleWorkers++
}
case "busy":
r.stats.BusyWorkers++
}
}
r.stats.Uptime = time.Since(r.stats.StartTime)
r.stats.LastUpdated = time.Now()
}
// GetTaskCapabilities returns all task capabilities available in the registry
func (r *Registry) GetTaskCapabilities() []types.TaskType {
r.mutex.RLock()
defer r.mutex.RUnlock()
capabilitySet := make(map[types.TaskType]bool)
for _, worker := range r.workers {
for _, cap := range worker.Capabilities {
capabilitySet[cap] = true
}
}
var capabilities []types.TaskType
for cap := range capabilitySet {
capabilities = append(capabilities, cap)
}
return capabilities
}
// GetWorkersByStatus returns workers filtered by status
func (r *Registry) GetWorkersByStatus(status string) []*types.Worker {
r.mutex.RLock()
defer r.mutex.RUnlock()
var workers []*types.Worker
for _, worker := range r.workers {
if worker.Status == status {
workers = append(workers, worker)
}
}
return workers
}
// GetWorkerCount returns the total number of registered workers
func (r *Registry) GetWorkerCount() int {
r.mutex.RLock()
defer r.mutex.RUnlock()
return len(r.workers)
}
// GetWorkerIDs returns all worker IDs
func (r *Registry) GetWorkerIDs() []string {
r.mutex.RLock()
defer r.mutex.RUnlock()
ids := make([]string, 0, len(r.workers))
for id := range r.workers {
ids = append(ids, id)
}
return ids
}
// GetWorkerSummary returns a summary of all workers
func (r *Registry) GetWorkerSummary() *types.WorkerSummary {
r.mutex.RLock()
defer r.mutex.RUnlock()
summary := &types.WorkerSummary{
TotalWorkers: len(r.workers),
ByStatus: make(map[string]int),
ByCapability: make(map[types.TaskType]int),
TotalLoad: 0,
MaxCapacity: 0,
}
for _, worker := range r.workers {
summary.ByStatus[worker.Status]++
summary.TotalLoad += worker.CurrentLoad
summary.MaxCapacity += worker.MaxConcurrent
for _, cap := range worker.Capabilities {
summary.ByCapability[cap]++
}
}
return summary
}
// Default global registry instance
var defaultRegistry *Registry
var registryOnce sync.Once
// GetDefaultRegistry returns the default global registry
func GetDefaultRegistry() *Registry {
registryOnce.Do(func() {
defaultRegistry = NewRegistry()
})
return defaultRegistry
}

View File

@@ -0,0 +1,82 @@
package balance
import (
"fmt"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/tasks"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Task implements balance operation to redistribute volumes across volume servers
type Task struct {
*tasks.BaseTask
server string
volumeID uint32
collection string
}
// NewTask creates a new balance task instance
func NewTask(server string, volumeID uint32, collection string) *Task {
task := &Task{
BaseTask: tasks.NewBaseTask(types.TaskTypeBalance),
server: server,
volumeID: volumeID,
collection: collection,
}
return task
}
// Execute executes the balance task
func (t *Task) Execute(params types.TaskParams) error {
glog.Infof("Starting balance task for volume %d on server %s (collection: %s)", t.volumeID, t.server, t.collection)
// Simulate balance operation with progress updates
steps := []struct {
name string
duration time.Duration
progress float64
}{
{"Analyzing cluster state", 2 * time.Second, 15},
{"Identifying optimal placement", 3 * time.Second, 35},
{"Moving volume data", 6 * time.Second, 75},
{"Updating cluster metadata", 2 * time.Second, 95},
{"Verifying balance", 1 * time.Second, 100},
}
for _, step := range steps {
if t.IsCancelled() {
return fmt.Errorf("balance task cancelled")
}
glog.V(1).Infof("Balance task step: %s", step.name)
t.SetProgress(step.progress)
// Simulate work
time.Sleep(step.duration)
}
glog.Infof("Balance task completed for volume %d on server %s", t.volumeID, t.server)
return nil
}
// Validate validates the task parameters
func (t *Task) Validate(params types.TaskParams) error {
if params.VolumeID == 0 {
return fmt.Errorf("volume_id is required")
}
if params.Server == "" {
return fmt.Errorf("server is required")
}
return nil
}
// EstimateTime estimates the time needed for the task
func (t *Task) EstimateTime(params types.TaskParams) time.Duration {
// Base time for balance operation
baseTime := 35 * time.Second
// Could adjust based on volume size or cluster state
return baseTime
}

View File

@@ -0,0 +1,171 @@
package balance
import (
"fmt"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// BalanceDetector implements TaskDetector for balance tasks
type BalanceDetector struct {
enabled bool
threshold float64 // Imbalance threshold (0.1 = 10%)
minCheckInterval time.Duration
minVolumeCount int
lastCheck time.Time
}
// Compile-time interface assertions
var (
_ types.TaskDetector = (*BalanceDetector)(nil)
)
// NewBalanceDetector creates a new balance detector
func NewBalanceDetector() *BalanceDetector {
return &BalanceDetector{
enabled: true,
threshold: 0.1, // 10% imbalance threshold
minCheckInterval: 1 * time.Hour,
minVolumeCount: 10, // Don't balance small clusters
lastCheck: time.Time{},
}
}
// GetTaskType returns the task type
func (d *BalanceDetector) GetTaskType() types.TaskType {
return types.TaskTypeBalance
}
// ScanForTasks checks if cluster balance is needed
func (d *BalanceDetector) ScanForTasks(volumeMetrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterInfo) ([]*types.TaskDetectionResult, error) {
if !d.enabled {
return nil, nil
}
glog.V(2).Infof("Scanning for balance tasks...")
// Don't check too frequently
if time.Since(d.lastCheck) < d.minCheckInterval {
return nil, nil
}
d.lastCheck = time.Now()
// Skip if cluster is too small
if len(volumeMetrics) < d.minVolumeCount {
glog.V(2).Infof("Cluster too small for balance (%d volumes < %d minimum)", len(volumeMetrics), d.minVolumeCount)
return nil, nil
}
// Analyze volume distribution across servers
serverVolumeCounts := make(map[string]int)
for _, metric := range volumeMetrics {
serverVolumeCounts[metric.Server]++
}
if len(serverVolumeCounts) < 2 {
glog.V(2).Infof("Not enough servers for balance (%d servers)", len(serverVolumeCounts))
return nil, nil
}
// Calculate balance metrics
totalVolumes := len(volumeMetrics)
avgVolumesPerServer := float64(totalVolumes) / float64(len(serverVolumeCounts))
maxVolumes := 0
minVolumes := totalVolumes
maxServer := ""
minServer := ""
for server, count := range serverVolumeCounts {
if count > maxVolumes {
maxVolumes = count
maxServer = server
}
if count < minVolumes {
minVolumes = count
minServer = server
}
}
// Check if imbalance exceeds threshold
imbalanceRatio := float64(maxVolumes-minVolumes) / avgVolumesPerServer
if imbalanceRatio <= d.threshold {
glog.V(2).Infof("Cluster is balanced (imbalance ratio: %.2f <= %.2f)", imbalanceRatio, d.threshold)
return nil, nil
}
// Create balance task
reason := fmt.Sprintf("Cluster imbalance detected: %.1f%% (max: %d on %s, min: %d on %s, avg: %.1f)",
imbalanceRatio*100, maxVolumes, maxServer, minVolumes, minServer, avgVolumesPerServer)
task := &types.TaskDetectionResult{
TaskType: types.TaskTypeBalance,
Priority: types.TaskPriorityNormal,
Reason: reason,
ScheduleAt: time.Now(),
Parameters: map[string]interface{}{
"imbalance_ratio": imbalanceRatio,
"threshold": d.threshold,
"max_volumes": maxVolumes,
"min_volumes": minVolumes,
"avg_volumes_per_server": avgVolumesPerServer,
"max_server": maxServer,
"min_server": minServer,
"total_servers": len(serverVolumeCounts),
},
}
glog.V(1).Infof("🔄 Found balance task: %s", reason)
return []*types.TaskDetectionResult{task}, nil
}
// ScanInterval returns how often to scan
func (d *BalanceDetector) ScanInterval() time.Duration {
return d.minCheckInterval
}
// IsEnabled returns whether the detector is enabled
func (d *BalanceDetector) IsEnabled() bool {
return d.enabled
}
// SetEnabled sets whether the detector is enabled
func (d *BalanceDetector) SetEnabled(enabled bool) {
d.enabled = enabled
glog.V(1).Infof("🔄 Balance detector enabled: %v", enabled)
}
// SetThreshold sets the imbalance threshold
func (d *BalanceDetector) SetThreshold(threshold float64) {
d.threshold = threshold
glog.V(1).Infof("🔄 Balance threshold set to: %.1f%%", threshold*100)
}
// SetMinCheckInterval sets the minimum time between balance checks
func (d *BalanceDetector) SetMinCheckInterval(interval time.Duration) {
d.minCheckInterval = interval
glog.V(1).Infof("🔄 Balance check interval set to: %v", interval)
}
// SetMinVolumeCount sets the minimum volume count for balance operations
func (d *BalanceDetector) SetMinVolumeCount(count int) {
d.minVolumeCount = count
glog.V(1).Infof("🔄 Balance minimum volume count set to: %d", count)
}
// GetThreshold returns the current imbalance threshold
func (d *BalanceDetector) GetThreshold() float64 {
return d.threshold
}
// GetMinCheckInterval returns the minimum check interval
func (d *BalanceDetector) GetMinCheckInterval() time.Duration {
return d.minCheckInterval
}
// GetMinVolumeCount returns the minimum volume count
func (d *BalanceDetector) GetMinVolumeCount() int {
return d.minVolumeCount
}

View File

@@ -0,0 +1,81 @@
package balance
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/worker/tasks"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Factory creates balance task instances
type Factory struct {
*tasks.BaseTaskFactory
}
// NewFactory creates a new balance task factory
func NewFactory() *Factory {
return &Factory{
BaseTaskFactory: tasks.NewBaseTaskFactory(
types.TaskTypeBalance,
[]string{"balance", "storage", "optimization"},
"Balance data across volume servers for optimal performance",
),
}
}
// Create creates a new balance task instance
func (f *Factory) Create(params types.TaskParams) (types.TaskInterface, error) {
// Validate parameters
if params.VolumeID == 0 {
return nil, fmt.Errorf("volume_id is required")
}
if params.Server == "" {
return nil, fmt.Errorf("server is required")
}
task := NewTask(params.Server, params.VolumeID, params.Collection)
task.SetEstimatedDuration(task.EstimateTime(params))
return task, nil
}
// Shared detector and scheduler instances
var (
sharedDetector *BalanceDetector
sharedScheduler *BalanceScheduler
)
// getSharedInstances returns the shared detector and scheduler instances
func getSharedInstances() (*BalanceDetector, *BalanceScheduler) {
if sharedDetector == nil {
sharedDetector = NewBalanceDetector()
}
if sharedScheduler == nil {
sharedScheduler = NewBalanceScheduler()
}
return sharedDetector, sharedScheduler
}
// GetSharedInstances returns the shared detector and scheduler instances (public access)
func GetSharedInstances() (*BalanceDetector, *BalanceScheduler) {
return getSharedInstances()
}
// Auto-register this task when the package is imported
func init() {
factory := NewFactory()
tasks.AutoRegister(types.TaskTypeBalance, factory)
// Get shared instances for all registrations
detector, scheduler := getSharedInstances()
// Register with types registry
tasks.AutoRegisterTypes(func(registry *types.TaskRegistry) {
registry.RegisterTask(detector, scheduler)
})
// Register with UI registry using the same instances
tasks.AutoRegisterUI(func(uiRegistry *types.UIRegistry) {
RegisterUI(uiRegistry, detector, scheduler)
})
}

View File

@@ -0,0 +1,197 @@
package balance
import (
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// BalanceScheduler implements TaskScheduler for balance tasks
type BalanceScheduler struct {
enabled bool
maxConcurrent int
minInterval time.Duration
lastScheduled map[string]time.Time // track when we last scheduled a balance for each task type
minServerCount int
moveDuringOffHours bool
offHoursStart string
offHoursEnd string
}
// Compile-time interface assertions
var (
_ types.TaskScheduler = (*BalanceScheduler)(nil)
)
// NewBalanceScheduler creates a new balance scheduler
func NewBalanceScheduler() *BalanceScheduler {
return &BalanceScheduler{
enabled: true,
maxConcurrent: 1, // Only run one balance at a time
minInterval: 6 * time.Hour,
lastScheduled: make(map[string]time.Time),
minServerCount: 3,
moveDuringOffHours: true,
offHoursStart: "23:00",
offHoursEnd: "06:00",
}
}
// GetTaskType returns the task type
func (s *BalanceScheduler) GetTaskType() types.TaskType {
return types.TaskTypeBalance
}
// CanScheduleNow determines if a balance task can be scheduled
func (s *BalanceScheduler) CanScheduleNow(task *types.Task, runningTasks []*types.Task, availableWorkers []*types.Worker) bool {
if !s.enabled {
return false
}
// Count running balance tasks
runningBalanceCount := 0
for _, runningTask := range runningTasks {
if runningTask.Type == types.TaskTypeBalance {
runningBalanceCount++
}
}
// Check concurrency limit
if runningBalanceCount >= s.maxConcurrent {
glog.V(3).Infof("⏸️ Balance task blocked: too many running (%d >= %d)", runningBalanceCount, s.maxConcurrent)
return false
}
// Check minimum interval between balance operations
if lastTime, exists := s.lastScheduled["balance"]; exists {
if time.Since(lastTime) < s.minInterval {
timeLeft := s.minInterval - time.Since(lastTime)
glog.V(3).Infof("⏸️ Balance task blocked: too soon (wait %v)", timeLeft)
return false
}
}
// Check if we have available workers
availableWorkerCount := 0
for _, worker := range availableWorkers {
for _, capability := range worker.Capabilities {
if capability == types.TaskTypeBalance {
availableWorkerCount++
break
}
}
}
if availableWorkerCount == 0 {
glog.V(3).Infof("⏸️ Balance task blocked: no available workers")
return false
}
// All checks passed - can schedule
s.lastScheduled["balance"] = time.Now()
glog.V(2).Infof("✅ Balance task can be scheduled (running: %d/%d, workers: %d)",
runningBalanceCount, s.maxConcurrent, availableWorkerCount)
return true
}
// GetPriority returns the priority for balance tasks
func (s *BalanceScheduler) GetPriority(task *types.Task) types.TaskPriority {
// Balance is typically normal priority - not urgent but important for optimization
return types.TaskPriorityNormal
}
// GetMaxConcurrent returns the maximum concurrent balance tasks
func (s *BalanceScheduler) GetMaxConcurrent() int {
return s.maxConcurrent
}
// GetDefaultRepeatInterval returns the default interval to wait before repeating balance tasks
func (s *BalanceScheduler) GetDefaultRepeatInterval() time.Duration {
return s.minInterval
}
// IsEnabled returns whether the scheduler is enabled
func (s *BalanceScheduler) IsEnabled() bool {
return s.enabled
}
// SetEnabled sets whether the scheduler is enabled
func (s *BalanceScheduler) SetEnabled(enabled bool) {
s.enabled = enabled
glog.V(1).Infof("🔄 Balance scheduler enabled: %v", enabled)
}
// SetMaxConcurrent sets the maximum concurrent balance tasks
func (s *BalanceScheduler) SetMaxConcurrent(max int) {
s.maxConcurrent = max
glog.V(1).Infof("🔄 Balance max concurrent set to: %d", max)
}
// SetMinInterval sets the minimum interval between balance operations
func (s *BalanceScheduler) SetMinInterval(interval time.Duration) {
s.minInterval = interval
glog.V(1).Infof("🔄 Balance minimum interval set to: %v", interval)
}
// GetLastScheduled returns when we last scheduled this task type
func (s *BalanceScheduler) GetLastScheduled(taskKey string) time.Time {
if lastTime, exists := s.lastScheduled[taskKey]; exists {
return lastTime
}
return time.Time{}
}
// SetLastScheduled updates when we last scheduled this task type
func (s *BalanceScheduler) SetLastScheduled(taskKey string, when time.Time) {
s.lastScheduled[taskKey] = when
}
// GetMinServerCount returns the minimum server count
func (s *BalanceScheduler) GetMinServerCount() int {
return s.minServerCount
}
// SetMinServerCount sets the minimum server count
func (s *BalanceScheduler) SetMinServerCount(count int) {
s.minServerCount = count
glog.V(1).Infof("🔄 Balance minimum server count set to: %d", count)
}
// GetMoveDuringOffHours returns whether to move only during off-hours
func (s *BalanceScheduler) GetMoveDuringOffHours() bool {
return s.moveDuringOffHours
}
// SetMoveDuringOffHours sets whether to move only during off-hours
func (s *BalanceScheduler) SetMoveDuringOffHours(enabled bool) {
s.moveDuringOffHours = enabled
glog.V(1).Infof("🔄 Balance move during off-hours: %v", enabled)
}
// GetOffHoursStart returns the off-hours start time
func (s *BalanceScheduler) GetOffHoursStart() string {
return s.offHoursStart
}
// SetOffHoursStart sets the off-hours start time
func (s *BalanceScheduler) SetOffHoursStart(start string) {
s.offHoursStart = start
glog.V(1).Infof("🔄 Balance off-hours start time set to: %s", start)
}
// GetOffHoursEnd returns the off-hours end time
func (s *BalanceScheduler) GetOffHoursEnd() string {
return s.offHoursEnd
}
// SetOffHoursEnd sets the off-hours end time
func (s *BalanceScheduler) SetOffHoursEnd(end string) {
s.offHoursEnd = end
glog.V(1).Infof("🔄 Balance off-hours end time set to: %s", end)
}
// GetMinInterval returns the minimum interval
func (s *BalanceScheduler) GetMinInterval() time.Duration {
return s.minInterval
}

View File

@@ -0,0 +1,361 @@
package balance
import (
"fmt"
"html/template"
"strconv"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// UIProvider provides the UI for balance task configuration
type UIProvider struct {
detector *BalanceDetector
scheduler *BalanceScheduler
}
// NewUIProvider creates a new balance UI provider
func NewUIProvider(detector *BalanceDetector, scheduler *BalanceScheduler) *UIProvider {
return &UIProvider{
detector: detector,
scheduler: scheduler,
}
}
// GetTaskType returns the task type
func (ui *UIProvider) GetTaskType() types.TaskType {
return types.TaskTypeBalance
}
// GetDisplayName returns the human-readable name
func (ui *UIProvider) GetDisplayName() string {
return "Volume Balance"
}
// GetDescription returns a description of what this task does
func (ui *UIProvider) GetDescription() string {
return "Redistributes volumes across volume servers to optimize storage utilization and performance"
}
// GetIcon returns the icon CSS class for this task type
func (ui *UIProvider) GetIcon() string {
return "fas fa-balance-scale text-secondary"
}
// BalanceConfig represents the balance configuration
type BalanceConfig struct {
Enabled bool `json:"enabled"`
ImbalanceThreshold float64 `json:"imbalance_threshold"`
ScanIntervalSeconds int `json:"scan_interval_seconds"`
MaxConcurrent int `json:"max_concurrent"`
MinServerCount int `json:"min_server_count"`
MoveDuringOffHours bool `json:"move_during_off_hours"`
OffHoursStart string `json:"off_hours_start"`
OffHoursEnd string `json:"off_hours_end"`
MinIntervalSeconds int `json:"min_interval_seconds"`
}
// Helper functions for duration conversion
func secondsToDuration(seconds int) time.Duration {
return time.Duration(seconds) * time.Second
}
func durationToSeconds(d time.Duration) int {
return int(d.Seconds())
}
// formatDurationForUser formats seconds as a user-friendly duration string
func formatDurationForUser(seconds int) string {
d := secondsToDuration(seconds)
if d < time.Minute {
return fmt.Sprintf("%ds", seconds)
}
if d < time.Hour {
return fmt.Sprintf("%.0fm", d.Minutes())
}
if d < 24*time.Hour {
return fmt.Sprintf("%.1fh", d.Hours())
}
return fmt.Sprintf("%.1fd", d.Hours()/24)
}
// RenderConfigForm renders the configuration form HTML
func (ui *UIProvider) RenderConfigForm(currentConfig interface{}) (template.HTML, error) {
config := ui.getCurrentBalanceConfig()
// Build form using the FormBuilder helper
form := types.NewFormBuilder()
// Detection Settings
form.AddCheckboxField(
"enabled",
"Enable Balance Tasks",
"Whether balance tasks should be automatically created",
config.Enabled,
)
form.AddNumberField(
"imbalance_threshold",
"Imbalance Threshold (%)",
"Trigger balance when storage imbalance exceeds this percentage (0.0-1.0)",
config.ImbalanceThreshold,
true,
)
form.AddDurationField("scan_interval", "Scan Interval", "How often to scan for imbalanced volumes", secondsToDuration(config.ScanIntervalSeconds), true)
// Scheduling Settings
form.AddNumberField(
"max_concurrent",
"Max Concurrent Tasks",
"Maximum number of balance tasks that can run simultaneously",
float64(config.MaxConcurrent),
true,
)
form.AddNumberField(
"min_server_count",
"Minimum Server Count",
"Only balance when at least this many servers are available",
float64(config.MinServerCount),
true,
)
// Timing Settings
form.AddCheckboxField(
"move_during_off_hours",
"Restrict to Off-Hours",
"Only perform balance operations during off-peak hours",
config.MoveDuringOffHours,
)
form.AddTextField(
"off_hours_start",
"Off-Hours Start Time",
"Start time for off-hours window (e.g., 23:00)",
config.OffHoursStart,
false,
)
form.AddTextField(
"off_hours_end",
"Off-Hours End Time",
"End time for off-hours window (e.g., 06:00)",
config.OffHoursEnd,
false,
)
// Timing constraints
form.AddDurationField("min_interval", "Min Interval", "Minimum time between balance operations", secondsToDuration(config.MinIntervalSeconds), true)
// Generate organized form sections using Bootstrap components
html := `
<div class="row">
<div class="col-12">
<div class="card mb-4">
<div class="card-header">
<h5 class="mb-0">
<i class="fas fa-balance-scale me-2"></i>
Balance Configuration
</h5>
</div>
<div class="card-body">
` + string(form.Build()) + `
</div>
</div>
</div>
</div>
<div class="row">
<div class="col-12">
<div class="card mb-3">
<div class="card-header">
<h5 class="mb-0">
<i class="fas fa-exclamation-triangle me-2"></i>
Performance Considerations
</h5>
</div>
<div class="card-body">
<div class="alert alert-warning" role="alert">
<h6 class="alert-heading">Important Considerations:</h6>
<p class="mb-2"><strong>Performance:</strong> Volume balancing involves data movement and can impact cluster performance.</p>
<p class="mb-2"><strong>Recommendation:</strong> Enable off-hours restriction to minimize impact on production workloads.</p>
<p class="mb-0"><strong>Safety:</strong> Requires at least ` + fmt.Sprintf("%d", config.MinServerCount) + ` servers to ensure data safety during moves.</p>
</div>
</div>
</div>
</div>
</div>`
return template.HTML(html), nil
}
// ParseConfigForm parses form data into configuration
func (ui *UIProvider) ParseConfigForm(formData map[string][]string) (interface{}, error) {
config := &BalanceConfig{}
// Parse enabled
config.Enabled = len(formData["enabled"]) > 0
// Parse imbalance threshold
if values, ok := formData["imbalance_threshold"]; ok && len(values) > 0 {
threshold, err := strconv.ParseFloat(values[0], 64)
if err != nil {
return nil, fmt.Errorf("invalid imbalance threshold: %v", err)
}
if threshold < 0 || threshold > 1 {
return nil, fmt.Errorf("imbalance threshold must be between 0.0 and 1.0")
}
config.ImbalanceThreshold = threshold
}
// Parse scan interval
if values, ok := formData["scan_interval"]; ok && len(values) > 0 {
duration, err := time.ParseDuration(values[0])
if err != nil {
return nil, fmt.Errorf("invalid scan interval: %v", err)
}
config.ScanIntervalSeconds = int(duration.Seconds())
}
// Parse max concurrent
if values, ok := formData["max_concurrent"]; ok && len(values) > 0 {
maxConcurrent, err := strconv.Atoi(values[0])
if err != nil {
return nil, fmt.Errorf("invalid max concurrent: %v", err)
}
if maxConcurrent < 1 {
return nil, fmt.Errorf("max concurrent must be at least 1")
}
config.MaxConcurrent = maxConcurrent
}
// Parse min server count
if values, ok := formData["min_server_count"]; ok && len(values) > 0 {
minServerCount, err := strconv.Atoi(values[0])
if err != nil {
return nil, fmt.Errorf("invalid min server count: %v", err)
}
if minServerCount < 2 {
return nil, fmt.Errorf("min server count must be at least 2")
}
config.MinServerCount = minServerCount
}
// Parse off-hours settings
config.MoveDuringOffHours = len(formData["move_during_off_hours"]) > 0
if values, ok := formData["off_hours_start"]; ok && len(values) > 0 {
config.OffHoursStart = values[0]
}
if values, ok := formData["off_hours_end"]; ok && len(values) > 0 {
config.OffHoursEnd = values[0]
}
// Parse min interval
if values, ok := formData["min_interval"]; ok && len(values) > 0 {
duration, err := time.ParseDuration(values[0])
if err != nil {
return nil, fmt.Errorf("invalid min interval: %v", err)
}
config.MinIntervalSeconds = int(duration.Seconds())
}
return config, nil
}
// GetCurrentConfig returns the current configuration
func (ui *UIProvider) GetCurrentConfig() interface{} {
return ui.getCurrentBalanceConfig()
}
// ApplyConfig applies the new configuration
func (ui *UIProvider) ApplyConfig(config interface{}) error {
balanceConfig, ok := config.(*BalanceConfig)
if !ok {
return fmt.Errorf("invalid config type, expected *BalanceConfig")
}
// Apply to detector
if ui.detector != nil {
ui.detector.SetEnabled(balanceConfig.Enabled)
ui.detector.SetThreshold(balanceConfig.ImbalanceThreshold)
ui.detector.SetMinCheckInterval(secondsToDuration(balanceConfig.ScanIntervalSeconds))
}
// Apply to scheduler
if ui.scheduler != nil {
ui.scheduler.SetEnabled(balanceConfig.Enabled)
ui.scheduler.SetMaxConcurrent(balanceConfig.MaxConcurrent)
ui.scheduler.SetMinServerCount(balanceConfig.MinServerCount)
ui.scheduler.SetMoveDuringOffHours(balanceConfig.MoveDuringOffHours)
ui.scheduler.SetOffHoursStart(balanceConfig.OffHoursStart)
ui.scheduler.SetOffHoursEnd(balanceConfig.OffHoursEnd)
}
glog.V(1).Infof("Applied balance configuration: enabled=%v, threshold=%.1f%%, max_concurrent=%d, min_servers=%d, off_hours=%v",
balanceConfig.Enabled, balanceConfig.ImbalanceThreshold*100, balanceConfig.MaxConcurrent,
balanceConfig.MinServerCount, balanceConfig.MoveDuringOffHours)
return nil
}
// getCurrentBalanceConfig gets the current configuration from detector and scheduler
func (ui *UIProvider) getCurrentBalanceConfig() *BalanceConfig {
config := &BalanceConfig{
// Default values (fallback if detectors/schedulers are nil)
Enabled: true,
ImbalanceThreshold: 0.1, // 10% imbalance
ScanIntervalSeconds: durationToSeconds(4 * time.Hour),
MaxConcurrent: 1,
MinServerCount: 3,
MoveDuringOffHours: true,
OffHoursStart: "23:00",
OffHoursEnd: "06:00",
MinIntervalSeconds: durationToSeconds(1 * time.Hour),
}
// Get current values from detector
if ui.detector != nil {
config.Enabled = ui.detector.IsEnabled()
config.ImbalanceThreshold = ui.detector.GetThreshold()
config.ScanIntervalSeconds = int(ui.detector.ScanInterval().Seconds())
}
// Get current values from scheduler
if ui.scheduler != nil {
config.MaxConcurrent = ui.scheduler.GetMaxConcurrent()
config.MinServerCount = ui.scheduler.GetMinServerCount()
config.MoveDuringOffHours = ui.scheduler.GetMoveDuringOffHours()
config.OffHoursStart = ui.scheduler.GetOffHoursStart()
config.OffHoursEnd = ui.scheduler.GetOffHoursEnd()
}
return config
}
// RegisterUI registers the balance UI provider with the UI registry
func RegisterUI(uiRegistry *types.UIRegistry, detector *BalanceDetector, scheduler *BalanceScheduler) {
uiProvider := NewUIProvider(detector, scheduler)
uiRegistry.RegisterUI(uiProvider)
glog.V(1).Infof("✅ Registered balance task UI provider")
}
// DefaultBalanceConfig returns default balance configuration
func DefaultBalanceConfig() *BalanceConfig {
return &BalanceConfig{
Enabled: false,
ImbalanceThreshold: 0.3,
ScanIntervalSeconds: durationToSeconds(4 * time.Hour),
MaxConcurrent: 1,
MinServerCount: 3,
MoveDuringOffHours: false,
OffHoursStart: "22:00",
OffHoursEnd: "06:00",
MinIntervalSeconds: durationToSeconds(1 * time.Hour),
}
}

View File

@@ -0,0 +1,369 @@
package balance
import (
"fmt"
"strconv"
"time"
"github.com/seaweedfs/seaweedfs/weed/admin/view/components"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Helper function to format seconds as duration string
func formatDurationFromSeconds(seconds int) string {
d := time.Duration(seconds) * time.Second
return d.String()
}
// Helper functions to convert between seconds and value+unit format
func secondsToValueAndUnit(seconds int) (float64, string) {
if seconds == 0 {
return 0, "minutes"
}
// Try days first
if seconds%(24*3600) == 0 && seconds >= 24*3600 {
return float64(seconds / (24 * 3600)), "days"
}
// Try hours
if seconds%3600 == 0 && seconds >= 3600 {
return float64(seconds / 3600), "hours"
}
// Default to minutes
return float64(seconds / 60), "minutes"
}
func valueAndUnitToSeconds(value float64, unit string) int {
switch unit {
case "days":
return int(value * 24 * 3600)
case "hours":
return int(value * 3600)
case "minutes":
return int(value * 60)
default:
return int(value * 60) // Default to minutes
}
}
// UITemplProvider provides the templ-based UI for balance task configuration
type UITemplProvider struct {
detector *BalanceDetector
scheduler *BalanceScheduler
}
// NewUITemplProvider creates a new balance templ UI provider
func NewUITemplProvider(detector *BalanceDetector, scheduler *BalanceScheduler) *UITemplProvider {
return &UITemplProvider{
detector: detector,
scheduler: scheduler,
}
}
// GetTaskType returns the task type
func (ui *UITemplProvider) GetTaskType() types.TaskType {
return types.TaskTypeBalance
}
// GetDisplayName returns the human-readable name
func (ui *UITemplProvider) GetDisplayName() string {
return "Volume Balance"
}
// GetDescription returns a description of what this task does
func (ui *UITemplProvider) GetDescription() string {
return "Redistributes volumes across volume servers to optimize storage utilization and performance"
}
// GetIcon returns the icon CSS class for this task type
func (ui *UITemplProvider) GetIcon() string {
return "fas fa-balance-scale text-secondary"
}
// RenderConfigSections renders the configuration as templ section data
func (ui *UITemplProvider) RenderConfigSections(currentConfig interface{}) ([]components.ConfigSectionData, error) {
config := ui.getCurrentBalanceConfig()
// Detection settings section
detectionSection := components.ConfigSectionData{
Title: "Detection Settings",
Icon: "fas fa-search",
Description: "Configure when balance tasks should be triggered",
Fields: []interface{}{
components.CheckboxFieldData{
FormFieldData: components.FormFieldData{
Name: "enabled",
Label: "Enable Balance Tasks",
Description: "Whether balance tasks should be automatically created",
},
Checked: config.Enabled,
},
components.NumberFieldData{
FormFieldData: components.FormFieldData{
Name: "imbalance_threshold",
Label: "Imbalance Threshold",
Description: "Trigger balance when storage imbalance exceeds this percentage (0.0-1.0)",
Required: true,
},
Value: config.ImbalanceThreshold,
Step: "0.01",
Min: floatPtr(0.0),
Max: floatPtr(1.0),
},
components.DurationInputFieldData{
FormFieldData: components.FormFieldData{
Name: "scan_interval",
Label: "Scan Interval",
Description: "How often to scan for imbalanced volumes",
Required: true,
},
Seconds: config.ScanIntervalSeconds,
},
},
}
// Scheduling settings section
schedulingSection := components.ConfigSectionData{
Title: "Scheduling Settings",
Icon: "fas fa-clock",
Description: "Configure task scheduling and concurrency",
Fields: []interface{}{
components.NumberFieldData{
FormFieldData: components.FormFieldData{
Name: "max_concurrent",
Label: "Max Concurrent Tasks",
Description: "Maximum number of balance tasks that can run simultaneously",
Required: true,
},
Value: float64(config.MaxConcurrent),
Step: "1",
Min: floatPtr(1),
},
components.NumberFieldData{
FormFieldData: components.FormFieldData{
Name: "min_server_count",
Label: "Minimum Server Count",
Description: "Only balance when at least this many servers are available",
Required: true,
},
Value: float64(config.MinServerCount),
Step: "1",
Min: floatPtr(1),
},
},
}
// Timing constraints section
timingSection := components.ConfigSectionData{
Title: "Timing Constraints",
Icon: "fas fa-calendar-clock",
Description: "Configure when balance operations are allowed",
Fields: []interface{}{
components.CheckboxFieldData{
FormFieldData: components.FormFieldData{
Name: "move_during_off_hours",
Label: "Restrict to Off-Hours",
Description: "Only perform balance operations during off-peak hours",
},
Checked: config.MoveDuringOffHours,
},
components.TextFieldData{
FormFieldData: components.FormFieldData{
Name: "off_hours_start",
Label: "Off-Hours Start Time",
Description: "Start time for off-hours window (e.g., 23:00)",
},
Value: config.OffHoursStart,
},
components.TextFieldData{
FormFieldData: components.FormFieldData{
Name: "off_hours_end",
Label: "Off-Hours End Time",
Description: "End time for off-hours window (e.g., 06:00)",
},
Value: config.OffHoursEnd,
},
},
}
// Performance impact info section
performanceSection := components.ConfigSectionData{
Title: "Performance Considerations",
Icon: "fas fa-exclamation-triangle",
Description: "Important information about balance operations",
Fields: []interface{}{
components.TextFieldData{
FormFieldData: components.FormFieldData{
Name: "performance_info",
Label: "Performance Impact",
Description: "Volume balancing involves data movement and can impact cluster performance",
},
Value: "Enable off-hours restriction to minimize impact on production workloads",
},
components.TextFieldData{
FormFieldData: components.FormFieldData{
Name: "safety_info",
Label: "Safety Requirements",
Description: fmt.Sprintf("Requires at least %d servers to ensure data safety during moves", config.MinServerCount),
},
Value: "Maintains data safety during volume moves between servers",
},
},
}
return []components.ConfigSectionData{detectionSection, schedulingSection, timingSection, performanceSection}, nil
}
// ParseConfigForm parses form data into configuration
func (ui *UITemplProvider) ParseConfigForm(formData map[string][]string) (interface{}, error) {
config := &BalanceConfig{}
// Parse enabled checkbox
config.Enabled = len(formData["enabled"]) > 0 && formData["enabled"][0] == "on"
// Parse imbalance threshold
if thresholdStr := formData["imbalance_threshold"]; len(thresholdStr) > 0 {
if threshold, err := strconv.ParseFloat(thresholdStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid imbalance threshold: %v", err)
} else if threshold < 0 || threshold > 1 {
return nil, fmt.Errorf("imbalance threshold must be between 0.0 and 1.0")
} else {
config.ImbalanceThreshold = threshold
}
}
// Parse scan interval
if valueStr := formData["scan_interval"]; len(valueStr) > 0 {
if value, err := strconv.ParseFloat(valueStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid scan interval value: %v", err)
} else {
unit := "minutes" // default
if unitStr := formData["scan_interval_unit"]; len(unitStr) > 0 {
unit = unitStr[0]
}
config.ScanIntervalSeconds = valueAndUnitToSeconds(value, unit)
}
}
// Parse max concurrent
if concurrentStr := formData["max_concurrent"]; len(concurrentStr) > 0 {
if concurrent, err := strconv.Atoi(concurrentStr[0]); err != nil {
return nil, fmt.Errorf("invalid max concurrent: %v", err)
} else if concurrent < 1 {
return nil, fmt.Errorf("max concurrent must be at least 1")
} else {
config.MaxConcurrent = concurrent
}
}
// Parse min server count
if serverCountStr := formData["min_server_count"]; len(serverCountStr) > 0 {
if serverCount, err := strconv.Atoi(serverCountStr[0]); err != nil {
return nil, fmt.Errorf("invalid min server count: %v", err)
} else if serverCount < 1 {
return nil, fmt.Errorf("min server count must be at least 1")
} else {
config.MinServerCount = serverCount
}
}
// Parse move during off hours
config.MoveDuringOffHours = len(formData["move_during_off_hours"]) > 0 && formData["move_during_off_hours"][0] == "on"
// Parse off hours start time
if startStr := formData["off_hours_start"]; len(startStr) > 0 {
config.OffHoursStart = startStr[0]
}
// Parse off hours end time
if endStr := formData["off_hours_end"]; len(endStr) > 0 {
config.OffHoursEnd = endStr[0]
}
return config, nil
}
// GetCurrentConfig returns the current configuration
func (ui *UITemplProvider) GetCurrentConfig() interface{} {
return ui.getCurrentBalanceConfig()
}
// ApplyConfig applies the new configuration
func (ui *UITemplProvider) ApplyConfig(config interface{}) error {
balanceConfig, ok := config.(*BalanceConfig)
if !ok {
return fmt.Errorf("invalid config type, expected *BalanceConfig")
}
// Apply to detector
if ui.detector != nil {
ui.detector.SetEnabled(balanceConfig.Enabled)
ui.detector.SetThreshold(balanceConfig.ImbalanceThreshold)
ui.detector.SetMinCheckInterval(time.Duration(balanceConfig.ScanIntervalSeconds) * time.Second)
}
// Apply to scheduler
if ui.scheduler != nil {
ui.scheduler.SetEnabled(balanceConfig.Enabled)
ui.scheduler.SetMaxConcurrent(balanceConfig.MaxConcurrent)
ui.scheduler.SetMinServerCount(balanceConfig.MinServerCount)
ui.scheduler.SetMoveDuringOffHours(balanceConfig.MoveDuringOffHours)
ui.scheduler.SetOffHoursStart(balanceConfig.OffHoursStart)
ui.scheduler.SetOffHoursEnd(balanceConfig.OffHoursEnd)
}
glog.V(1).Infof("Applied balance configuration: enabled=%v, threshold=%.1f%%, max_concurrent=%d, min_servers=%d, off_hours=%v",
balanceConfig.Enabled, balanceConfig.ImbalanceThreshold*100, balanceConfig.MaxConcurrent,
balanceConfig.MinServerCount, balanceConfig.MoveDuringOffHours)
return nil
}
// getCurrentBalanceConfig gets the current configuration from detector and scheduler
func (ui *UITemplProvider) getCurrentBalanceConfig() *BalanceConfig {
config := &BalanceConfig{
// Default values (fallback if detectors/schedulers are nil)
Enabled: true,
ImbalanceThreshold: 0.1, // 10% imbalance
ScanIntervalSeconds: int((4 * time.Hour).Seconds()),
MaxConcurrent: 1,
MinServerCount: 3,
MoveDuringOffHours: true,
OffHoursStart: "23:00",
OffHoursEnd: "06:00",
}
// Get current values from detector
if ui.detector != nil {
config.Enabled = ui.detector.IsEnabled()
config.ImbalanceThreshold = ui.detector.GetThreshold()
config.ScanIntervalSeconds = int(ui.detector.ScanInterval().Seconds())
}
// Get current values from scheduler
if ui.scheduler != nil {
config.MaxConcurrent = ui.scheduler.GetMaxConcurrent()
config.MinServerCount = ui.scheduler.GetMinServerCount()
config.MoveDuringOffHours = ui.scheduler.GetMoveDuringOffHours()
config.OffHoursStart = ui.scheduler.GetOffHoursStart()
config.OffHoursEnd = ui.scheduler.GetOffHoursEnd()
}
return config
}
// floatPtr is a helper function to create float64 pointers
func floatPtr(f float64) *float64 {
return &f
}
// RegisterUITempl registers the balance templ UI provider with the UI registry
func RegisterUITempl(uiRegistry *types.UITemplRegistry, detector *BalanceDetector, scheduler *BalanceScheduler) {
uiProvider := NewUITemplProvider(detector, scheduler)
uiRegistry.RegisterUI(uiProvider)
glog.V(1).Infof("✅ Registered balance task templ UI provider")
}

View File

@@ -0,0 +1,79 @@
package erasure_coding
import (
"fmt"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/tasks"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Task implements erasure coding operation to convert volumes to EC format
type Task struct {
*tasks.BaseTask
server string
volumeID uint32
}
// NewTask creates a new erasure coding task instance
func NewTask(server string, volumeID uint32) *Task {
task := &Task{
BaseTask: tasks.NewBaseTask(types.TaskTypeErasureCoding),
server: server,
volumeID: volumeID,
}
return task
}
// Execute executes the erasure coding task
func (t *Task) Execute(params types.TaskParams) error {
glog.Infof("Starting erasure coding task for volume %d on server %s", t.volumeID, t.server)
// Simulate erasure coding operation with progress updates
steps := []struct {
name string
duration time.Duration
progress float64
}{
{"Analyzing volume", 2 * time.Second, 15},
{"Creating EC shards", 5 * time.Second, 50},
{"Verifying shards", 2 * time.Second, 75},
{"Finalizing EC volume", 1 * time.Second, 100},
}
for _, step := range steps {
if t.IsCancelled() {
return fmt.Errorf("erasure coding task cancelled")
}
glog.V(1).Infof("Erasure coding task step: %s", step.name)
t.SetProgress(step.progress)
// Simulate work
time.Sleep(step.duration)
}
glog.Infof("Erasure coding task completed for volume %d on server %s", t.volumeID, t.server)
return nil
}
// Validate validates the task parameters
func (t *Task) Validate(params types.TaskParams) error {
if params.VolumeID == 0 {
return fmt.Errorf("volume_id is required")
}
if params.Server == "" {
return fmt.Errorf("server is required")
}
return nil
}
// EstimateTime estimates the time needed for the task
func (t *Task) EstimateTime(params types.TaskParams) time.Duration {
// Base time for erasure coding operation
baseTime := 30 * time.Second
// Could adjust based on volume size or other factors
return baseTime
}

View File

@@ -0,0 +1,139 @@
package erasure_coding
import (
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// EcDetector implements erasure coding task detection
type EcDetector struct {
enabled bool
volumeAgeHours int
fullnessRatio float64
scanInterval time.Duration
}
// Compile-time interface assertions
var (
_ types.TaskDetector = (*EcDetector)(nil)
)
// NewEcDetector creates a new erasure coding detector
func NewEcDetector() *EcDetector {
return &EcDetector{
enabled: false, // Conservative default
volumeAgeHours: 24 * 7, // 1 week
fullnessRatio: 0.9, // 90% full
scanInterval: 2 * time.Hour,
}
}
// GetTaskType returns the task type
func (d *EcDetector) GetTaskType() types.TaskType {
return types.TaskTypeErasureCoding
}
// ScanForTasks scans for volumes that should be converted to erasure coding
func (d *EcDetector) ScanForTasks(volumeMetrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterInfo) ([]*types.TaskDetectionResult, error) {
if !d.enabled {
return nil, nil
}
var results []*types.TaskDetectionResult
now := time.Now()
ageThreshold := time.Duration(d.volumeAgeHours) * time.Hour
for _, metric := range volumeMetrics {
// Skip if already EC volume
if metric.IsECVolume {
continue
}
// Check age and fullness criteria
if metric.Age >= ageThreshold && metric.FullnessRatio >= d.fullnessRatio {
// Check if volume is read-only (safe for EC conversion)
if !metric.IsReadOnly {
continue
}
result := &types.TaskDetectionResult{
TaskType: types.TaskTypeErasureCoding,
VolumeID: metric.VolumeID,
Server: metric.Server,
Collection: metric.Collection,
Priority: types.TaskPriorityLow, // EC is not urgent
Reason: "Volume is old and full enough for EC conversion",
Parameters: map[string]interface{}{
"age_hours": int(metric.Age.Hours()),
"fullness_ratio": metric.FullnessRatio,
},
ScheduleAt: now,
}
results = append(results, result)
}
}
glog.V(2).Infof("EC detector found %d tasks to schedule", len(results))
return results, nil
}
// ScanInterval returns how often this task type should be scanned
func (d *EcDetector) ScanInterval() time.Duration {
return d.scanInterval
}
// IsEnabled returns whether this task type is enabled
func (d *EcDetector) IsEnabled() bool {
return d.enabled
}
// Configuration setters
func (d *EcDetector) SetEnabled(enabled bool) {
d.enabled = enabled
}
func (d *EcDetector) SetVolumeAgeHours(hours int) {
d.volumeAgeHours = hours
}
func (d *EcDetector) SetFullnessRatio(ratio float64) {
d.fullnessRatio = ratio
}
func (d *EcDetector) SetScanInterval(interval time.Duration) {
d.scanInterval = interval
}
// GetVolumeAgeHours returns the current volume age threshold in hours
func (d *EcDetector) GetVolumeAgeHours() int {
return d.volumeAgeHours
}
// GetFullnessRatio returns the current fullness ratio threshold
func (d *EcDetector) GetFullnessRatio() float64 {
return d.fullnessRatio
}
// GetScanInterval returns the scan interval
func (d *EcDetector) GetScanInterval() time.Duration {
return d.scanInterval
}
// ConfigureFromPolicy configures the detector based on the maintenance policy
func (d *EcDetector) ConfigureFromPolicy(policy interface{}) {
// Type assert to the maintenance policy type we expect
if maintenancePolicy, ok := policy.(interface {
GetECEnabled() bool
GetECVolumeAgeHours() int
GetECFullnessRatio() float64
}); ok {
d.SetEnabled(maintenancePolicy.GetECEnabled())
d.SetVolumeAgeHours(maintenancePolicy.GetECVolumeAgeHours())
d.SetFullnessRatio(maintenancePolicy.GetECFullnessRatio())
} else {
glog.V(1).Infof("Could not configure EC detector from policy: unsupported policy type")
}
}

View File

@@ -0,0 +1,81 @@
package erasure_coding
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/worker/tasks"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Factory creates erasure coding task instances
type Factory struct {
*tasks.BaseTaskFactory
}
// NewFactory creates a new erasure coding task factory
func NewFactory() *Factory {
return &Factory{
BaseTaskFactory: tasks.NewBaseTaskFactory(
types.TaskTypeErasureCoding,
[]string{"erasure_coding", "storage", "durability"},
"Convert volumes to erasure coded format for improved durability",
),
}
}
// Create creates a new erasure coding task instance
func (f *Factory) Create(params types.TaskParams) (types.TaskInterface, error) {
// Validate parameters
if params.VolumeID == 0 {
return nil, fmt.Errorf("volume_id is required")
}
if params.Server == "" {
return nil, fmt.Errorf("server is required")
}
task := NewTask(params.Server, params.VolumeID)
task.SetEstimatedDuration(task.EstimateTime(params))
return task, nil
}
// Shared detector and scheduler instances
var (
sharedDetector *EcDetector
sharedScheduler *Scheduler
)
// getSharedInstances returns the shared detector and scheduler instances
func getSharedInstances() (*EcDetector, *Scheduler) {
if sharedDetector == nil {
sharedDetector = NewEcDetector()
}
if sharedScheduler == nil {
sharedScheduler = NewScheduler()
}
return sharedDetector, sharedScheduler
}
// GetSharedInstances returns the shared detector and scheduler instances (public access)
func GetSharedInstances() (*EcDetector, *Scheduler) {
return getSharedInstances()
}
// Auto-register this task when the package is imported
func init() {
factory := NewFactory()
tasks.AutoRegister(types.TaskTypeErasureCoding, factory)
// Get shared instances for all registrations
detector, scheduler := getSharedInstances()
// Register with types registry
tasks.AutoRegisterTypes(func(registry *types.TaskRegistry) {
registry.RegisterTask(detector, scheduler)
})
// Register with UI registry using the same instances
tasks.AutoRegisterUI(func(uiRegistry *types.UIRegistry) {
RegisterUI(uiRegistry, detector, scheduler)
})
}

View File

@@ -0,0 +1,114 @@
package erasure_coding
import (
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Scheduler implements erasure coding task scheduling
type Scheduler struct {
maxConcurrent int
enabled bool
}
// NewScheduler creates a new erasure coding scheduler
func NewScheduler() *Scheduler {
return &Scheduler{
maxConcurrent: 1, // Conservative default
enabled: false, // Conservative default
}
}
// GetTaskType returns the task type
func (s *Scheduler) GetTaskType() types.TaskType {
return types.TaskTypeErasureCoding
}
// CanScheduleNow determines if an erasure coding task can be scheduled now
func (s *Scheduler) CanScheduleNow(task *types.Task, runningTasks []*types.Task, availableWorkers []*types.Worker) bool {
if !s.enabled {
return false
}
// Check if we have available workers
if len(availableWorkers) == 0 {
return false
}
// Count running EC tasks
runningCount := 0
for _, runningTask := range runningTasks {
if runningTask.Type == types.TaskTypeErasureCoding {
runningCount++
}
}
// Check concurrency limit
if runningCount >= s.maxConcurrent {
glog.V(3).Infof("EC scheduler: at concurrency limit (%d/%d)", runningCount, s.maxConcurrent)
return false
}
// Check if any worker can handle EC tasks
for _, worker := range availableWorkers {
for _, capability := range worker.Capabilities {
if capability == types.TaskTypeErasureCoding {
glog.V(3).Infof("EC scheduler: can schedule task for volume %d", task.VolumeID)
return true
}
}
}
return false
}
// GetMaxConcurrent returns the maximum number of concurrent tasks
func (s *Scheduler) GetMaxConcurrent() int {
return s.maxConcurrent
}
// GetDefaultRepeatInterval returns the default interval to wait before repeating EC tasks
func (s *Scheduler) GetDefaultRepeatInterval() time.Duration {
return 24 * time.Hour // Don't repeat EC for 24 hours
}
// GetPriority returns the priority for this task
func (s *Scheduler) GetPriority(task *types.Task) types.TaskPriority {
return types.TaskPriorityLow // EC is not urgent
}
// WasTaskRecentlyCompleted checks if a similar task was recently completed
func (s *Scheduler) WasTaskRecentlyCompleted(task *types.Task, completedTasks []*types.Task, now time.Time) bool {
// Don't repeat EC for 24 hours
interval := 24 * time.Hour
cutoff := now.Add(-interval)
for _, completedTask := range completedTasks {
if completedTask.Type == types.TaskTypeErasureCoding &&
completedTask.VolumeID == task.VolumeID &&
completedTask.Server == task.Server &&
completedTask.Status == types.TaskStatusCompleted &&
completedTask.CompletedAt != nil &&
completedTask.CompletedAt.After(cutoff) {
return true
}
}
return false
}
// IsEnabled returns whether this task type is enabled
func (s *Scheduler) IsEnabled() bool {
return s.enabled
}
// Configuration setters
func (s *Scheduler) SetEnabled(enabled bool) {
s.enabled = enabled
}
func (s *Scheduler) SetMaxConcurrent(max int) {
s.maxConcurrent = max
}

View File

@@ -0,0 +1,309 @@
package erasure_coding
import (
"fmt"
"html/template"
"strconv"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// UIProvider provides the UI for erasure coding task configuration
type UIProvider struct {
detector *EcDetector
scheduler *Scheduler
}
// NewUIProvider creates a new erasure coding UI provider
func NewUIProvider(detector *EcDetector, scheduler *Scheduler) *UIProvider {
return &UIProvider{
detector: detector,
scheduler: scheduler,
}
}
// GetTaskType returns the task type
func (ui *UIProvider) GetTaskType() types.TaskType {
return types.TaskTypeErasureCoding
}
// GetDisplayName returns the human-readable name
func (ui *UIProvider) GetDisplayName() string {
return "Erasure Coding"
}
// GetDescription returns a description of what this task does
func (ui *UIProvider) GetDescription() string {
return "Converts volumes to erasure coded format for improved data durability and fault tolerance"
}
// GetIcon returns the icon CSS class for this task type
func (ui *UIProvider) GetIcon() string {
return "fas fa-shield-alt text-info"
}
// ErasureCodingConfig represents the erasure coding configuration
type ErasureCodingConfig struct {
Enabled bool `json:"enabled"`
VolumeAgeHoursSeconds int `json:"volume_age_hours_seconds"`
FullnessRatio float64 `json:"fullness_ratio"`
ScanIntervalSeconds int `json:"scan_interval_seconds"`
MaxConcurrent int `json:"max_concurrent"`
ShardCount int `json:"shard_count"`
ParityCount int `json:"parity_count"`
CollectionFilter string `json:"collection_filter"`
}
// Helper functions for duration conversion
func secondsToDuration(seconds int) time.Duration {
return time.Duration(seconds) * time.Second
}
func durationToSeconds(d time.Duration) int {
return int(d.Seconds())
}
// formatDurationForUser formats seconds as a user-friendly duration string
func formatDurationForUser(seconds int) string {
d := secondsToDuration(seconds)
if d < time.Minute {
return fmt.Sprintf("%ds", seconds)
}
if d < time.Hour {
return fmt.Sprintf("%.0fm", d.Minutes())
}
if d < 24*time.Hour {
return fmt.Sprintf("%.1fh", d.Hours())
}
return fmt.Sprintf("%.1fd", d.Hours()/24)
}
// RenderConfigForm renders the configuration form HTML
func (ui *UIProvider) RenderConfigForm(currentConfig interface{}) (template.HTML, error) {
config := ui.getCurrentECConfig()
// Build form using the FormBuilder helper
form := types.NewFormBuilder()
// Detection Settings
form.AddCheckboxField(
"enabled",
"Enable Erasure Coding Tasks",
"Whether erasure coding tasks should be automatically created",
config.Enabled,
)
form.AddNumberField(
"volume_age_hours_seconds",
"Volume Age Threshold",
"Only apply erasure coding to volumes older than this duration",
float64(config.VolumeAgeHoursSeconds),
true,
)
form.AddNumberField(
"scan_interval_seconds",
"Scan Interval",
"How often to scan for volumes needing erasure coding",
float64(config.ScanIntervalSeconds),
true,
)
// Scheduling Settings
form.AddNumberField(
"max_concurrent",
"Max Concurrent Tasks",
"Maximum number of erasure coding tasks that can run simultaneously",
float64(config.MaxConcurrent),
true,
)
// Erasure Coding Parameters
form.AddNumberField(
"shard_count",
"Data Shards",
"Number of data shards for erasure coding (recommended: 10)",
float64(config.ShardCount),
true,
)
form.AddNumberField(
"parity_count",
"Parity Shards",
"Number of parity shards for erasure coding (recommended: 4)",
float64(config.ParityCount),
true,
)
// Generate organized form sections using Bootstrap components
html := `
<div class="row">
<div class="col-12">
<div class="card mb-4">
<div class="card-header">
<h5 class="mb-0">
<i class="fas fa-shield-alt me-2"></i>
Erasure Coding Configuration
</h5>
</div>
<div class="card-body">
` + string(form.Build()) + `
</div>
</div>
</div>
</div>
<div class="row">
<div class="col-12">
<div class="card mb-3">
<div class="card-header">
<h5 class="mb-0">
<i class="fas fa-info-circle me-2"></i>
Performance Impact
</h5>
</div>
<div class="card-body">
<div class="alert alert-info" role="alert">
<h6 class="alert-heading">Important Notes:</h6>
<p class="mb-2"><strong>Performance:</strong> Erasure coding is CPU and I/O intensive. Consider running during off-peak hours.</p>
<p class="mb-0"><strong>Durability:</strong> With ` + fmt.Sprintf("%d+%d", config.ShardCount, config.ParityCount) + ` configuration, can tolerate up to ` + fmt.Sprintf("%d", config.ParityCount) + ` shard failures.</p>
</div>
</div>
</div>
</div>
</div>`
return template.HTML(html), nil
}
// ParseConfigForm parses form data into configuration
func (ui *UIProvider) ParseConfigForm(formData map[string][]string) (interface{}, error) {
config := &ErasureCodingConfig{}
// Parse enabled
config.Enabled = len(formData["enabled"]) > 0
// Parse volume age hours
if values, ok := formData["volume_age_hours_seconds"]; ok && len(values) > 0 {
hours, err := strconv.Atoi(values[0])
if err != nil {
return nil, fmt.Errorf("invalid volume age hours: %v", err)
}
config.VolumeAgeHoursSeconds = hours
}
// Parse scan interval
if values, ok := formData["scan_interval_seconds"]; ok && len(values) > 0 {
interval, err := strconv.Atoi(values[0])
if err != nil {
return nil, fmt.Errorf("invalid scan interval: %v", err)
}
config.ScanIntervalSeconds = interval
}
// Parse max concurrent
if values, ok := formData["max_concurrent"]; ok && len(values) > 0 {
maxConcurrent, err := strconv.Atoi(values[0])
if err != nil {
return nil, fmt.Errorf("invalid max concurrent: %v", err)
}
if maxConcurrent < 1 {
return nil, fmt.Errorf("max concurrent must be at least 1")
}
config.MaxConcurrent = maxConcurrent
}
// Parse shard count
if values, ok := formData["shard_count"]; ok && len(values) > 0 {
shardCount, err := strconv.Atoi(values[0])
if err != nil {
return nil, fmt.Errorf("invalid shard count: %v", err)
}
if shardCount < 1 {
return nil, fmt.Errorf("shard count must be at least 1")
}
config.ShardCount = shardCount
}
// Parse parity count
if values, ok := formData["parity_count"]; ok && len(values) > 0 {
parityCount, err := strconv.Atoi(values[0])
if err != nil {
return nil, fmt.Errorf("invalid parity count: %v", err)
}
if parityCount < 1 {
return nil, fmt.Errorf("parity count must be at least 1")
}
config.ParityCount = parityCount
}
return config, nil
}
// GetCurrentConfig returns the current configuration
func (ui *UIProvider) GetCurrentConfig() interface{} {
return ui.getCurrentECConfig()
}
// ApplyConfig applies the new configuration
func (ui *UIProvider) ApplyConfig(config interface{}) error {
ecConfig, ok := config.(ErasureCodingConfig)
if !ok {
return fmt.Errorf("invalid config type, expected ErasureCodingConfig")
}
// Apply to detector
if ui.detector != nil {
ui.detector.SetEnabled(ecConfig.Enabled)
ui.detector.SetVolumeAgeHours(ecConfig.VolumeAgeHoursSeconds)
ui.detector.SetScanInterval(secondsToDuration(ecConfig.ScanIntervalSeconds))
}
// Apply to scheduler
if ui.scheduler != nil {
ui.scheduler.SetEnabled(ecConfig.Enabled)
ui.scheduler.SetMaxConcurrent(ecConfig.MaxConcurrent)
}
glog.V(1).Infof("Applied erasure coding configuration: enabled=%v, age_threshold=%v, max_concurrent=%d, shards=%d+%d",
ecConfig.Enabled, ecConfig.VolumeAgeHoursSeconds, ecConfig.MaxConcurrent, ecConfig.ShardCount, ecConfig.ParityCount)
return nil
}
// getCurrentECConfig gets the current configuration from detector and scheduler
func (ui *UIProvider) getCurrentECConfig() ErasureCodingConfig {
config := ErasureCodingConfig{
// Default values (fallback if detectors/schedulers are nil)
Enabled: true,
VolumeAgeHoursSeconds: 24 * 3600, // 24 hours in seconds
ScanIntervalSeconds: 2 * 3600, // 2 hours in seconds
MaxConcurrent: 1,
ShardCount: 10,
ParityCount: 4,
}
// Get current values from detector
if ui.detector != nil {
config.Enabled = ui.detector.IsEnabled()
config.VolumeAgeHoursSeconds = ui.detector.GetVolumeAgeHours()
config.ScanIntervalSeconds = durationToSeconds(ui.detector.ScanInterval())
}
// Get current values from scheduler
if ui.scheduler != nil {
config.MaxConcurrent = ui.scheduler.GetMaxConcurrent()
}
return config
}
// RegisterUI registers the erasure coding UI provider with the UI registry
func RegisterUI(uiRegistry *types.UIRegistry, detector *EcDetector, scheduler *Scheduler) {
uiProvider := NewUIProvider(detector, scheduler)
uiRegistry.RegisterUI(uiProvider)
glog.V(1).Infof("✅ Registered erasure coding task UI provider")
}

View File

@@ -0,0 +1,319 @@
package erasure_coding
import (
"fmt"
"strconv"
"time"
"github.com/seaweedfs/seaweedfs/weed/admin/view/components"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Helper function to format seconds as duration string
func formatDurationFromSeconds(seconds int) string {
d := time.Duration(seconds) * time.Second
return d.String()
}
// Helper function to convert value and unit to seconds
func valueAndUnitToSeconds(value float64, unit string) int {
switch unit {
case "days":
return int(value * 24 * 60 * 60)
case "hours":
return int(value * 60 * 60)
case "minutes":
return int(value * 60)
default:
return int(value * 60) // Default to minutes
}
}
// UITemplProvider provides the templ-based UI for erasure coding task configuration
type UITemplProvider struct {
detector *EcDetector
scheduler *Scheduler
}
// NewUITemplProvider creates a new erasure coding templ UI provider
func NewUITemplProvider(detector *EcDetector, scheduler *Scheduler) *UITemplProvider {
return &UITemplProvider{
detector: detector,
scheduler: scheduler,
}
}
// ErasureCodingConfig is defined in ui.go - we reuse it
// GetTaskType returns the task type
func (ui *UITemplProvider) GetTaskType() types.TaskType {
return types.TaskTypeErasureCoding
}
// GetDisplayName returns the human-readable name
func (ui *UITemplProvider) GetDisplayName() string {
return "Erasure Coding"
}
// GetDescription returns a description of what this task does
func (ui *UITemplProvider) GetDescription() string {
return "Converts replicated volumes to erasure-coded format for efficient storage"
}
// GetIcon returns the icon CSS class for this task type
func (ui *UITemplProvider) GetIcon() string {
return "fas fa-shield-alt text-info"
}
// RenderConfigSections renders the configuration as templ section data
func (ui *UITemplProvider) RenderConfigSections(currentConfig interface{}) ([]components.ConfigSectionData, error) {
config := ui.getCurrentECConfig()
// Detection settings section
detectionSection := components.ConfigSectionData{
Title: "Detection Settings",
Icon: "fas fa-search",
Description: "Configure when erasure coding tasks should be triggered",
Fields: []interface{}{
components.CheckboxFieldData{
FormFieldData: components.FormFieldData{
Name: "enabled",
Label: "Enable Erasure Coding Tasks",
Description: "Whether erasure coding tasks should be automatically created",
},
Checked: config.Enabled,
},
components.DurationInputFieldData{
FormFieldData: components.FormFieldData{
Name: "scan_interval",
Label: "Scan Interval",
Description: "How often to scan for volumes needing erasure coding",
Required: true,
},
Seconds: config.ScanIntervalSeconds,
},
components.DurationInputFieldData{
FormFieldData: components.FormFieldData{
Name: "volume_age_threshold",
Label: "Volume Age Threshold",
Description: "Only apply erasure coding to volumes older than this age",
Required: true,
},
Seconds: config.VolumeAgeHoursSeconds,
},
},
}
// Erasure coding parameters section
paramsSection := components.ConfigSectionData{
Title: "Erasure Coding Parameters",
Icon: "fas fa-cogs",
Description: "Configure erasure coding scheme and performance",
Fields: []interface{}{
components.NumberFieldData{
FormFieldData: components.FormFieldData{
Name: "data_shards",
Label: "Data Shards",
Description: "Number of data shards in the erasure coding scheme",
Required: true,
},
Value: float64(config.ShardCount),
Step: "1",
Min: floatPtr(1),
Max: floatPtr(16),
},
components.NumberFieldData{
FormFieldData: components.FormFieldData{
Name: "parity_shards",
Label: "Parity Shards",
Description: "Number of parity shards (determines fault tolerance)",
Required: true,
},
Value: float64(config.ParityCount),
Step: "1",
Min: floatPtr(1),
Max: floatPtr(16),
},
components.NumberFieldData{
FormFieldData: components.FormFieldData{
Name: "max_concurrent",
Label: "Max Concurrent Tasks",
Description: "Maximum number of erasure coding tasks that can run simultaneously",
Required: true,
},
Value: float64(config.MaxConcurrent),
Step: "1",
Min: floatPtr(1),
},
},
}
// Performance impact info section
infoSection := components.ConfigSectionData{
Title: "Performance Impact",
Icon: "fas fa-info-circle",
Description: "Important information about erasure coding operations",
Fields: []interface{}{
components.TextFieldData{
FormFieldData: components.FormFieldData{
Name: "durability_info",
Label: "Durability",
Description: fmt.Sprintf("With %d+%d configuration, can tolerate up to %d shard failures",
config.ShardCount, config.ParityCount, config.ParityCount),
},
Value: "High durability with space efficiency",
},
components.TextFieldData{
FormFieldData: components.FormFieldData{
Name: "performance_info",
Label: "Performance Note",
Description: "Erasure coding is CPU and I/O intensive. Consider running during off-peak hours",
},
Value: "Schedule during low-traffic periods",
},
},
}
return []components.ConfigSectionData{detectionSection, paramsSection, infoSection}, nil
}
// ParseConfigForm parses form data into configuration
func (ui *UITemplProvider) ParseConfigForm(formData map[string][]string) (interface{}, error) {
config := &ErasureCodingConfig{}
// Parse enabled checkbox
config.Enabled = len(formData["enabled"]) > 0 && formData["enabled"][0] == "on"
// Parse volume age threshold
if valueStr := formData["volume_age_threshold"]; len(valueStr) > 0 {
if value, err := strconv.ParseFloat(valueStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid volume age threshold value: %v", err)
} else {
unit := "hours" // default
if unitStr := formData["volume_age_threshold_unit"]; len(unitStr) > 0 {
unit = unitStr[0]
}
config.VolumeAgeHoursSeconds = valueAndUnitToSeconds(value, unit)
}
}
// Parse scan interval
if valueStr := formData["scan_interval"]; len(valueStr) > 0 {
if value, err := strconv.ParseFloat(valueStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid scan interval value: %v", err)
} else {
unit := "hours" // default
if unitStr := formData["scan_interval_unit"]; len(unitStr) > 0 {
unit = unitStr[0]
}
config.ScanIntervalSeconds = valueAndUnitToSeconds(value, unit)
}
}
// Parse data shards
if shardsStr := formData["data_shards"]; len(shardsStr) > 0 {
if shards, err := strconv.Atoi(shardsStr[0]); err != nil {
return nil, fmt.Errorf("invalid data shards: %v", err)
} else if shards < 1 || shards > 16 {
return nil, fmt.Errorf("data shards must be between 1 and 16")
} else {
config.ShardCount = shards
}
}
// Parse parity shards
if shardsStr := formData["parity_shards"]; len(shardsStr) > 0 {
if shards, err := strconv.Atoi(shardsStr[0]); err != nil {
return nil, fmt.Errorf("invalid parity shards: %v", err)
} else if shards < 1 || shards > 16 {
return nil, fmt.Errorf("parity shards must be between 1 and 16")
} else {
config.ParityCount = shards
}
}
// Parse max concurrent
if concurrentStr := formData["max_concurrent"]; len(concurrentStr) > 0 {
if concurrent, err := strconv.Atoi(concurrentStr[0]); err != nil {
return nil, fmt.Errorf("invalid max concurrent: %v", err)
} else if concurrent < 1 {
return nil, fmt.Errorf("max concurrent must be at least 1")
} else {
config.MaxConcurrent = concurrent
}
}
return config, nil
}
// GetCurrentConfig returns the current configuration
func (ui *UITemplProvider) GetCurrentConfig() interface{} {
return ui.getCurrentECConfig()
}
// ApplyConfig applies the new configuration
func (ui *UITemplProvider) ApplyConfig(config interface{}) error {
ecConfig, ok := config.(*ErasureCodingConfig)
if !ok {
return fmt.Errorf("invalid config type, expected *ErasureCodingConfig")
}
// Apply to detector
if ui.detector != nil {
ui.detector.SetEnabled(ecConfig.Enabled)
ui.detector.SetVolumeAgeHours(ecConfig.VolumeAgeHoursSeconds)
ui.detector.SetScanInterval(time.Duration(ecConfig.ScanIntervalSeconds) * time.Second)
}
// Apply to scheduler
if ui.scheduler != nil {
ui.scheduler.SetMaxConcurrent(ecConfig.MaxConcurrent)
ui.scheduler.SetEnabled(ecConfig.Enabled)
}
glog.V(1).Infof("Applied erasure coding configuration: enabled=%v, age_threshold=%ds, max_concurrent=%d",
ecConfig.Enabled, ecConfig.VolumeAgeHoursSeconds, ecConfig.MaxConcurrent)
return nil
}
// getCurrentECConfig gets the current configuration from detector and scheduler
func (ui *UITemplProvider) getCurrentECConfig() *ErasureCodingConfig {
config := &ErasureCodingConfig{
// Default values (fallback if detectors/schedulers are nil)
Enabled: true,
VolumeAgeHoursSeconds: int((24 * time.Hour).Seconds()),
ScanIntervalSeconds: int((2 * time.Hour).Seconds()),
MaxConcurrent: 1,
ShardCount: 10,
ParityCount: 4,
}
// Get current values from detector
if ui.detector != nil {
config.Enabled = ui.detector.IsEnabled()
config.VolumeAgeHoursSeconds = ui.detector.GetVolumeAgeHours()
config.ScanIntervalSeconds = int(ui.detector.ScanInterval().Seconds())
}
// Get current values from scheduler
if ui.scheduler != nil {
config.MaxConcurrent = ui.scheduler.GetMaxConcurrent()
}
return config
}
// floatPtr is a helper function to create float64 pointers
func floatPtr(f float64) *float64 {
return &f
}
// RegisterUITempl registers the erasure coding templ UI provider with the UI registry
func RegisterUITempl(uiRegistry *types.UITemplRegistry, detector *EcDetector, scheduler *Scheduler) {
uiProvider := NewUITemplProvider(detector, scheduler)
uiRegistry.RegisterUI(uiProvider)
glog.V(1).Infof("✅ Registered erasure coding task templ UI provider")
}

View File

@@ -0,0 +1,110 @@
package tasks
import (
"sync"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
var (
globalRegistry *TaskRegistry
globalTypesRegistry *types.TaskRegistry
globalUIRegistry *types.UIRegistry
registryOnce sync.Once
typesRegistryOnce sync.Once
uiRegistryOnce sync.Once
)
// GetGlobalRegistry returns the global task registry (singleton)
func GetGlobalRegistry() *TaskRegistry {
registryOnce.Do(func() {
globalRegistry = NewTaskRegistry()
glog.V(1).Infof("Created global task registry")
})
return globalRegistry
}
// GetGlobalTypesRegistry returns the global types registry (singleton)
func GetGlobalTypesRegistry() *types.TaskRegistry {
typesRegistryOnce.Do(func() {
globalTypesRegistry = types.NewTaskRegistry()
glog.V(1).Infof("Created global types registry")
})
return globalTypesRegistry
}
// GetGlobalUIRegistry returns the global UI registry (singleton)
func GetGlobalUIRegistry() *types.UIRegistry {
uiRegistryOnce.Do(func() {
globalUIRegistry = types.NewUIRegistry()
glog.V(1).Infof("Created global UI registry")
})
return globalUIRegistry
}
// AutoRegister registers a task directly with the global registry
func AutoRegister(taskType types.TaskType, factory types.TaskFactory) {
registry := GetGlobalRegistry()
registry.Register(taskType, factory)
glog.V(1).Infof("Auto-registered task type: %s", taskType)
}
// AutoRegisterTypes registers a task with the global types registry
func AutoRegisterTypes(registerFunc func(*types.TaskRegistry)) {
registry := GetGlobalTypesRegistry()
registerFunc(registry)
glog.V(1).Infof("Auto-registered task with types registry")
}
// AutoRegisterUI registers a UI provider with the global UI registry
func AutoRegisterUI(registerFunc func(*types.UIRegistry)) {
registry := GetGlobalUIRegistry()
registerFunc(registry)
glog.V(1).Infof("Auto-registered task UI provider")
}
// SetDefaultCapabilitiesFromRegistry sets the default worker capabilities
// based on all registered task types
func SetDefaultCapabilitiesFromRegistry() {
typesRegistry := GetGlobalTypesRegistry()
var capabilities []types.TaskType
for taskType := range typesRegistry.GetAllDetectors() {
capabilities = append(capabilities, taskType)
}
// Set the default capabilities in the types package
types.SetDefaultCapabilities(capabilities)
glog.V(1).Infof("Set default worker capabilities from registry: %v", capabilities)
}
// BuildMaintenancePolicyFromTasks creates a maintenance policy with default configurations
// from all registered tasks using their UI providers
func BuildMaintenancePolicyFromTasks() *types.MaintenancePolicy {
policy := types.NewMaintenancePolicy()
// Get all registered task types from the UI registry
uiRegistry := GetGlobalUIRegistry()
for taskType, provider := range uiRegistry.GetAllProviders() {
// Get the default configuration from the UI provider
defaultConfig := provider.GetCurrentConfig()
// Set the configuration in the policy
policy.SetTaskConfig(taskType, defaultConfig)
glog.V(3).Infof("Added default config for task type %s to policy", taskType)
}
glog.V(2).Infof("Built maintenance policy with %d task configurations", len(policy.TaskConfigs))
return policy
}
// SetMaintenancePolicyFromTasks sets the default maintenance policy from registered tasks
func SetMaintenancePolicyFromTasks() {
// This function can be called to initialize the policy from registered tasks
// For now, we'll just log that this should be called by the integration layer
glog.V(1).Infof("SetMaintenancePolicyFromTasks called - policy should be built by the integration layer")
}

252
weed/worker/tasks/task.go Normal file
View File

@@ -0,0 +1,252 @@
package tasks
import (
"context"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// BaseTask provides common functionality for all tasks
type BaseTask struct {
taskType types.TaskType
progress float64
cancelled bool
mutex sync.RWMutex
startTime time.Time
estimatedDuration time.Duration
}
// NewBaseTask creates a new base task
func NewBaseTask(taskType types.TaskType) *BaseTask {
return &BaseTask{
taskType: taskType,
progress: 0.0,
cancelled: false,
}
}
// Type returns the task type
func (t *BaseTask) Type() types.TaskType {
return t.taskType
}
// GetProgress returns the current progress (0.0 to 100.0)
func (t *BaseTask) GetProgress() float64 {
t.mutex.RLock()
defer t.mutex.RUnlock()
return t.progress
}
// SetProgress sets the current progress
func (t *BaseTask) SetProgress(progress float64) {
t.mutex.Lock()
defer t.mutex.Unlock()
if progress < 0 {
progress = 0
}
if progress > 100 {
progress = 100
}
t.progress = progress
}
// Cancel cancels the task
func (t *BaseTask) Cancel() error {
t.mutex.Lock()
defer t.mutex.Unlock()
t.cancelled = true
return nil
}
// IsCancelled returns whether the task is cancelled
func (t *BaseTask) IsCancelled() bool {
t.mutex.RLock()
defer t.mutex.RUnlock()
return t.cancelled
}
// SetStartTime sets the task start time
func (t *BaseTask) SetStartTime(startTime time.Time) {
t.mutex.Lock()
defer t.mutex.Unlock()
t.startTime = startTime
}
// GetStartTime returns the task start time
func (t *BaseTask) GetStartTime() time.Time {
t.mutex.RLock()
defer t.mutex.RUnlock()
return t.startTime
}
// SetEstimatedDuration sets the estimated duration
func (t *BaseTask) SetEstimatedDuration(duration time.Duration) {
t.mutex.Lock()
defer t.mutex.Unlock()
t.estimatedDuration = duration
}
// GetEstimatedDuration returns the estimated duration
func (t *BaseTask) GetEstimatedDuration() time.Duration {
t.mutex.RLock()
defer t.mutex.RUnlock()
return t.estimatedDuration
}
// ExecuteTask is a wrapper that handles common task execution logic
func (t *BaseTask) ExecuteTask(ctx context.Context, params types.TaskParams, executor func(context.Context, types.TaskParams) error) error {
t.SetStartTime(time.Now())
t.SetProgress(0)
// Create a context that can be cancelled
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Monitor for cancellation
go func() {
for !t.IsCancelled() {
select {
case <-ctx.Done():
return
case <-time.After(time.Second):
// Check cancellation every second
}
}
cancel()
}()
// Execute the actual task
err := executor(ctx, params)
if err != nil {
return err
}
if t.IsCancelled() {
return context.Canceled
}
t.SetProgress(100)
return nil
}
// TaskRegistry manages task factories
type TaskRegistry struct {
factories map[types.TaskType]types.TaskFactory
mutex sync.RWMutex
}
// NewTaskRegistry creates a new task registry
func NewTaskRegistry() *TaskRegistry {
return &TaskRegistry{
factories: make(map[types.TaskType]types.TaskFactory),
}
}
// Register registers a task factory
func (r *TaskRegistry) Register(taskType types.TaskType, factory types.TaskFactory) {
r.mutex.Lock()
defer r.mutex.Unlock()
r.factories[taskType] = factory
}
// CreateTask creates a task instance
func (r *TaskRegistry) CreateTask(taskType types.TaskType, params types.TaskParams) (types.TaskInterface, error) {
r.mutex.RLock()
factory, exists := r.factories[taskType]
r.mutex.RUnlock()
if !exists {
return nil, &UnsupportedTaskTypeError{TaskType: taskType}
}
return factory.Create(params)
}
// GetSupportedTypes returns all supported task types
func (r *TaskRegistry) GetSupportedTypes() []types.TaskType {
r.mutex.RLock()
defer r.mutex.RUnlock()
types := make([]types.TaskType, 0, len(r.factories))
for taskType := range r.factories {
types = append(types, taskType)
}
return types
}
// GetFactory returns the factory for a task type
func (r *TaskRegistry) GetFactory(taskType types.TaskType) (types.TaskFactory, bool) {
r.mutex.RLock()
defer r.mutex.RUnlock()
factory, exists := r.factories[taskType]
return factory, exists
}
// UnsupportedTaskTypeError represents an error for unsupported task types
type UnsupportedTaskTypeError struct {
TaskType types.TaskType
}
func (e *UnsupportedTaskTypeError) Error() string {
return "unsupported task type: " + string(e.TaskType)
}
// BaseTaskFactory provides common functionality for task factories
type BaseTaskFactory struct {
taskType types.TaskType
capabilities []string
description string
}
// NewBaseTaskFactory creates a new base task factory
func NewBaseTaskFactory(taskType types.TaskType, capabilities []string, description string) *BaseTaskFactory {
return &BaseTaskFactory{
taskType: taskType,
capabilities: capabilities,
description: description,
}
}
// Capabilities returns the capabilities required for this task type
func (f *BaseTaskFactory) Capabilities() []string {
return f.capabilities
}
// Description returns the description of this task type
func (f *BaseTaskFactory) Description() string {
return f.description
}
// ValidateParams validates task parameters
func ValidateParams(params types.TaskParams, requiredFields ...string) error {
for _, field := range requiredFields {
switch field {
case "volume_id":
if params.VolumeID == 0 {
return &ValidationError{Field: field, Message: "volume_id is required"}
}
case "server":
if params.Server == "" {
return &ValidationError{Field: field, Message: "server is required"}
}
case "collection":
if params.Collection == "" {
return &ValidationError{Field: field, Message: "collection is required"}
}
}
}
return nil
}
// ValidationError represents a parameter validation error
type ValidationError struct {
Field string
Message string
}
func (e *ValidationError) Error() string {
return e.Field + ": " + e.Message
}

View File

@@ -0,0 +1,314 @@
package vacuum
import (
"fmt"
"html/template"
"strconv"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// UIProvider provides the UI for vacuum task configuration
type UIProvider struct {
detector *VacuumDetector
scheduler *VacuumScheduler
}
// NewUIProvider creates a new vacuum UI provider
func NewUIProvider(detector *VacuumDetector, scheduler *VacuumScheduler) *UIProvider {
return &UIProvider{
detector: detector,
scheduler: scheduler,
}
}
// GetTaskType returns the task type
func (ui *UIProvider) GetTaskType() types.TaskType {
return types.TaskTypeVacuum
}
// GetDisplayName returns the human-readable name
func (ui *UIProvider) GetDisplayName() string {
return "Volume Vacuum"
}
// GetDescription returns a description of what this task does
func (ui *UIProvider) GetDescription() string {
return "Reclaims disk space by removing deleted files from volumes"
}
// GetIcon returns the icon CSS class for this task type
func (ui *UIProvider) GetIcon() string {
return "fas fa-broom text-primary"
}
// VacuumConfig represents the vacuum configuration
type VacuumConfig struct {
Enabled bool `json:"enabled"`
GarbageThreshold float64 `json:"garbage_threshold"`
ScanIntervalSeconds int `json:"scan_interval_seconds"`
MaxConcurrent int `json:"max_concurrent"`
MinVolumeAgeSeconds int `json:"min_volume_age_seconds"`
MinIntervalSeconds int `json:"min_interval_seconds"`
}
// Helper functions for duration conversion
func secondsToDuration(seconds int) time.Duration {
return time.Duration(seconds) * time.Second
}
func durationToSeconds(d time.Duration) int {
return int(d.Seconds())
}
// formatDurationForUser formats seconds as a user-friendly duration string
func formatDurationForUser(seconds int) string {
d := secondsToDuration(seconds)
if d < time.Minute {
return fmt.Sprintf("%ds", seconds)
}
if d < time.Hour {
return fmt.Sprintf("%.0fm", d.Minutes())
}
if d < 24*time.Hour {
return fmt.Sprintf("%.1fh", d.Hours())
}
return fmt.Sprintf("%.1fd", d.Hours()/24)
}
// RenderConfigForm renders the configuration form HTML
func (ui *UIProvider) RenderConfigForm(currentConfig interface{}) (template.HTML, error) {
config := ui.getCurrentVacuumConfig()
// Build form using the FormBuilder helper
form := types.NewFormBuilder()
// Detection Settings
form.AddCheckboxField(
"enabled",
"Enable Vacuum Tasks",
"Whether vacuum tasks should be automatically created",
config.Enabled,
)
form.AddNumberField(
"garbage_threshold",
"Garbage Threshold (%)",
"Trigger vacuum when garbage ratio exceeds this percentage (0.0-1.0)",
config.GarbageThreshold,
true,
)
form.AddDurationField(
"scan_interval",
"Scan Interval",
"How often to scan for volumes needing vacuum",
secondsToDuration(config.ScanIntervalSeconds),
true,
)
form.AddDurationField(
"min_volume_age",
"Minimum Volume Age",
"Only vacuum volumes older than this duration",
secondsToDuration(config.MinVolumeAgeSeconds),
true,
)
// Scheduling Settings
form.AddNumberField(
"max_concurrent",
"Max Concurrent Tasks",
"Maximum number of vacuum tasks that can run simultaneously",
float64(config.MaxConcurrent),
true,
)
form.AddDurationField(
"min_interval",
"Minimum Interval",
"Minimum time between vacuum operations on the same volume",
secondsToDuration(config.MinIntervalSeconds),
true,
)
// Generate organized form sections using Bootstrap components
html := `
<div class="row">
<div class="col-12">
<div class="card mb-4">
<div class="card-header">
<h5 class="mb-0">
<i class="fas fa-search me-2"></i>
Detection Settings
</h5>
</div>
<div class="card-body">
` + string(form.Build()) + `
</div>
</div>
</div>
</div>
<script>
function resetForm() {
if (confirm('Reset all vacuum settings to defaults?')) {
// Reset to default values
document.querySelector('input[name="enabled"]').checked = true;
document.querySelector('input[name="garbage_threshold"]').value = '0.3';
document.querySelector('input[name="scan_interval"]').value = '30m';
document.querySelector('input[name="min_volume_age"]').value = '1h';
document.querySelector('input[name="max_concurrent"]').value = '2';
document.querySelector('input[name="min_interval"]').value = '6h';
}
}
</script>
`
return template.HTML(html), nil
}
// ParseConfigForm parses form data into configuration
func (ui *UIProvider) ParseConfigForm(formData map[string][]string) (interface{}, error) {
config := &VacuumConfig{}
// Parse enabled checkbox
config.Enabled = len(formData["enabled"]) > 0 && formData["enabled"][0] == "on"
// Parse garbage threshold
if thresholdStr := formData["garbage_threshold"]; len(thresholdStr) > 0 {
if threshold, err := strconv.ParseFloat(thresholdStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid garbage threshold: %v", err)
} else if threshold < 0 || threshold > 1 {
return nil, fmt.Errorf("garbage threshold must be between 0.0 and 1.0")
} else {
config.GarbageThreshold = threshold
}
}
// Parse scan interval
if intervalStr := formData["scan_interval"]; len(intervalStr) > 0 {
if interval, err := time.ParseDuration(intervalStr[0]); err != nil {
return nil, fmt.Errorf("invalid scan interval: %v", err)
} else {
config.ScanIntervalSeconds = durationToSeconds(interval)
}
}
// Parse min volume age
if ageStr := formData["min_volume_age"]; len(ageStr) > 0 {
if age, err := time.ParseDuration(ageStr[0]); err != nil {
return nil, fmt.Errorf("invalid min volume age: %v", err)
} else {
config.MinVolumeAgeSeconds = durationToSeconds(age)
}
}
// Parse max concurrent
if concurrentStr := formData["max_concurrent"]; len(concurrentStr) > 0 {
if concurrent, err := strconv.Atoi(concurrentStr[0]); err != nil {
return nil, fmt.Errorf("invalid max concurrent: %v", err)
} else if concurrent < 1 {
return nil, fmt.Errorf("max concurrent must be at least 1")
} else {
config.MaxConcurrent = concurrent
}
}
// Parse min interval
if intervalStr := formData["min_interval"]; len(intervalStr) > 0 {
if interval, err := time.ParseDuration(intervalStr[0]); err != nil {
return nil, fmt.Errorf("invalid min interval: %v", err)
} else {
config.MinIntervalSeconds = durationToSeconds(interval)
}
}
return config, nil
}
// GetCurrentConfig returns the current configuration
func (ui *UIProvider) GetCurrentConfig() interface{} {
return ui.getCurrentVacuumConfig()
}
// ApplyConfig applies the new configuration
func (ui *UIProvider) ApplyConfig(config interface{}) error {
vacuumConfig, ok := config.(*VacuumConfig)
if !ok {
return fmt.Errorf("invalid config type, expected *VacuumConfig")
}
// Apply to detector
if ui.detector != nil {
ui.detector.SetEnabled(vacuumConfig.Enabled)
ui.detector.SetGarbageThreshold(vacuumConfig.GarbageThreshold)
ui.detector.SetScanInterval(secondsToDuration(vacuumConfig.ScanIntervalSeconds))
ui.detector.SetMinVolumeAge(secondsToDuration(vacuumConfig.MinVolumeAgeSeconds))
}
// Apply to scheduler
if ui.scheduler != nil {
ui.scheduler.SetEnabled(vacuumConfig.Enabled)
ui.scheduler.SetMaxConcurrent(vacuumConfig.MaxConcurrent)
ui.scheduler.SetMinInterval(secondsToDuration(vacuumConfig.MinIntervalSeconds))
}
glog.V(1).Infof("Applied vacuum configuration: enabled=%v, threshold=%.1f%%, scan_interval=%s, max_concurrent=%d",
vacuumConfig.Enabled, vacuumConfig.GarbageThreshold*100, formatDurationForUser(vacuumConfig.ScanIntervalSeconds), vacuumConfig.MaxConcurrent)
return nil
}
// getCurrentVacuumConfig gets the current configuration from detector and scheduler
func (ui *UIProvider) getCurrentVacuumConfig() *VacuumConfig {
config := &VacuumConfig{
// Default values (fallback if detectors/schedulers are nil)
Enabled: true,
GarbageThreshold: 0.3,
ScanIntervalSeconds: 30 * 60,
MinVolumeAgeSeconds: 1 * 60 * 60,
MaxConcurrent: 2,
MinIntervalSeconds: 6 * 60 * 60,
}
// Get current values from detector
if ui.detector != nil {
config.Enabled = ui.detector.IsEnabled()
config.GarbageThreshold = ui.detector.GetGarbageThreshold()
config.ScanIntervalSeconds = durationToSeconds(ui.detector.ScanInterval())
config.MinVolumeAgeSeconds = durationToSeconds(ui.detector.GetMinVolumeAge())
}
// Get current values from scheduler
if ui.scheduler != nil {
config.MaxConcurrent = ui.scheduler.GetMaxConcurrent()
config.MinIntervalSeconds = durationToSeconds(ui.scheduler.GetMinInterval())
}
return config
}
// RegisterUI registers the vacuum UI provider with the UI registry
func RegisterUI(uiRegistry *types.UIRegistry, detector *VacuumDetector, scheduler *VacuumScheduler) {
uiProvider := NewUIProvider(detector, scheduler)
uiRegistry.RegisterUI(uiProvider)
glog.V(1).Infof("✅ Registered vacuum task UI provider")
}
// Example: How to get the UI provider for external use
func GetUIProvider(uiRegistry *types.UIRegistry) *UIProvider {
provider := uiRegistry.GetProvider(types.TaskTypeVacuum)
if provider == nil {
return nil
}
if vacuumProvider, ok := provider.(*UIProvider); ok {
return vacuumProvider
}
return nil
}

View File

@@ -0,0 +1,330 @@
package vacuum
import (
"fmt"
"strconv"
"time"
"github.com/seaweedfs/seaweedfs/weed/admin/view/components"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Helper function to format seconds as duration string
func formatDurationFromSeconds(seconds int) string {
d := time.Duration(seconds) * time.Second
return d.String()
}
// Helper functions to convert between seconds and value+unit format
func secondsToValueAndUnit(seconds int) (float64, string) {
if seconds == 0 {
return 0, "minutes"
}
// Try days first
if seconds%(24*3600) == 0 && seconds >= 24*3600 {
return float64(seconds / (24 * 3600)), "days"
}
// Try hours
if seconds%3600 == 0 && seconds >= 3600 {
return float64(seconds / 3600), "hours"
}
// Default to minutes
return float64(seconds / 60), "minutes"
}
func valueAndUnitToSeconds(value float64, unit string) int {
switch unit {
case "days":
return int(value * 24 * 3600)
case "hours":
return int(value * 3600)
case "minutes":
return int(value * 60)
default:
return int(value * 60) // Default to minutes
}
}
// UITemplProvider provides the templ-based UI for vacuum task configuration
type UITemplProvider struct {
detector *VacuumDetector
scheduler *VacuumScheduler
}
// NewUITemplProvider creates a new vacuum templ UI provider
func NewUITemplProvider(detector *VacuumDetector, scheduler *VacuumScheduler) *UITemplProvider {
return &UITemplProvider{
detector: detector,
scheduler: scheduler,
}
}
// GetTaskType returns the task type
func (ui *UITemplProvider) GetTaskType() types.TaskType {
return types.TaskTypeVacuum
}
// GetDisplayName returns the human-readable name
func (ui *UITemplProvider) GetDisplayName() string {
return "Volume Vacuum"
}
// GetDescription returns a description of what this task does
func (ui *UITemplProvider) GetDescription() string {
return "Reclaims disk space by removing deleted files from volumes"
}
// GetIcon returns the icon CSS class for this task type
func (ui *UITemplProvider) GetIcon() string {
return "fas fa-broom text-primary"
}
// RenderConfigSections renders the configuration as templ section data
func (ui *UITemplProvider) RenderConfigSections(currentConfig interface{}) ([]components.ConfigSectionData, error) {
config := ui.getCurrentVacuumConfig()
// Detection settings section
detectionSection := components.ConfigSectionData{
Title: "Detection Settings",
Icon: "fas fa-search",
Description: "Configure when vacuum tasks should be triggered",
Fields: []interface{}{
components.CheckboxFieldData{
FormFieldData: components.FormFieldData{
Name: "enabled",
Label: "Enable Vacuum Tasks",
Description: "Whether vacuum tasks should be automatically created",
},
Checked: config.Enabled,
},
components.NumberFieldData{
FormFieldData: components.FormFieldData{
Name: "garbage_threshold",
Label: "Garbage Threshold",
Description: "Trigger vacuum when garbage ratio exceeds this percentage (0.0-1.0)",
Required: true,
},
Value: config.GarbageThreshold,
Step: "0.01",
Min: floatPtr(0.0),
Max: floatPtr(1.0),
},
components.DurationInputFieldData{
FormFieldData: components.FormFieldData{
Name: "scan_interval",
Label: "Scan Interval",
Description: "How often to scan for volumes needing vacuum",
Required: true,
},
Seconds: config.ScanIntervalSeconds,
},
components.DurationInputFieldData{
FormFieldData: components.FormFieldData{
Name: "min_volume_age",
Label: "Minimum Volume Age",
Description: "Only vacuum volumes older than this duration",
Required: true,
},
Seconds: config.MinVolumeAgeSeconds,
},
},
}
// Scheduling settings section
schedulingSection := components.ConfigSectionData{
Title: "Scheduling Settings",
Icon: "fas fa-clock",
Description: "Configure task scheduling and concurrency",
Fields: []interface{}{
components.NumberFieldData{
FormFieldData: components.FormFieldData{
Name: "max_concurrent",
Label: "Max Concurrent Tasks",
Description: "Maximum number of vacuum tasks that can run simultaneously",
Required: true,
},
Value: float64(config.MaxConcurrent),
Step: "1",
Min: floatPtr(1),
},
components.DurationInputFieldData{
FormFieldData: components.FormFieldData{
Name: "min_interval",
Label: "Minimum Interval",
Description: "Minimum time between vacuum operations on the same volume",
Required: true,
},
Seconds: config.MinIntervalSeconds,
},
},
}
// Performance impact info section
performanceSection := components.ConfigSectionData{
Title: "Performance Impact",
Icon: "fas fa-exclamation-triangle",
Description: "Important information about vacuum operations",
Fields: []interface{}{
components.TextFieldData{
FormFieldData: components.FormFieldData{
Name: "info_impact",
Label: "Impact",
Description: "Volume vacuum operations are I/O intensive and should be scheduled appropriately",
},
Value: "Configure thresholds and intervals based on your storage usage patterns",
},
},
}
return []components.ConfigSectionData{detectionSection, schedulingSection, performanceSection}, nil
}
// ParseConfigForm parses form data into configuration
func (ui *UITemplProvider) ParseConfigForm(formData map[string][]string) (interface{}, error) {
config := &VacuumConfig{}
// Parse enabled checkbox
config.Enabled = len(formData["enabled"]) > 0 && formData["enabled"][0] == "on"
// Parse garbage threshold
if thresholdStr := formData["garbage_threshold"]; len(thresholdStr) > 0 {
if threshold, err := strconv.ParseFloat(thresholdStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid garbage threshold: %v", err)
} else if threshold < 0 || threshold > 1 {
return nil, fmt.Errorf("garbage threshold must be between 0.0 and 1.0")
} else {
config.GarbageThreshold = threshold
}
}
// Parse scan interval
if valueStr := formData["scan_interval"]; len(valueStr) > 0 {
if value, err := strconv.ParseFloat(valueStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid scan interval value: %v", err)
} else {
unit := "minutes" // default
if unitStr := formData["scan_interval_unit"]; len(unitStr) > 0 {
unit = unitStr[0]
}
config.ScanIntervalSeconds = valueAndUnitToSeconds(value, unit)
}
}
// Parse min volume age
if valueStr := formData["min_volume_age"]; len(valueStr) > 0 {
if value, err := strconv.ParseFloat(valueStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid min volume age value: %v", err)
} else {
unit := "minutes" // default
if unitStr := formData["min_volume_age_unit"]; len(unitStr) > 0 {
unit = unitStr[0]
}
config.MinVolumeAgeSeconds = valueAndUnitToSeconds(value, unit)
}
}
// Parse max concurrent
if concurrentStr := formData["max_concurrent"]; len(concurrentStr) > 0 {
if concurrent, err := strconv.Atoi(concurrentStr[0]); err != nil {
return nil, fmt.Errorf("invalid max concurrent: %v", err)
} else if concurrent < 1 {
return nil, fmt.Errorf("max concurrent must be at least 1")
} else {
config.MaxConcurrent = concurrent
}
}
// Parse min interval
if valueStr := formData["min_interval"]; len(valueStr) > 0 {
if value, err := strconv.ParseFloat(valueStr[0], 64); err != nil {
return nil, fmt.Errorf("invalid min interval value: %v", err)
} else {
unit := "minutes" // default
if unitStr := formData["min_interval_unit"]; len(unitStr) > 0 {
unit = unitStr[0]
}
config.MinIntervalSeconds = valueAndUnitToSeconds(value, unit)
}
}
return config, nil
}
// GetCurrentConfig returns the current configuration
func (ui *UITemplProvider) GetCurrentConfig() interface{} {
return ui.getCurrentVacuumConfig()
}
// ApplyConfig applies the new configuration
func (ui *UITemplProvider) ApplyConfig(config interface{}) error {
vacuumConfig, ok := config.(*VacuumConfig)
if !ok {
return fmt.Errorf("invalid config type, expected *VacuumConfig")
}
// Apply to detector
if ui.detector != nil {
ui.detector.SetEnabled(vacuumConfig.Enabled)
ui.detector.SetGarbageThreshold(vacuumConfig.GarbageThreshold)
ui.detector.SetScanInterval(time.Duration(vacuumConfig.ScanIntervalSeconds) * time.Second)
ui.detector.SetMinVolumeAge(time.Duration(vacuumConfig.MinVolumeAgeSeconds) * time.Second)
}
// Apply to scheduler
if ui.scheduler != nil {
ui.scheduler.SetEnabled(vacuumConfig.Enabled)
ui.scheduler.SetMaxConcurrent(vacuumConfig.MaxConcurrent)
ui.scheduler.SetMinInterval(time.Duration(vacuumConfig.MinIntervalSeconds) * time.Second)
}
glog.V(1).Infof("Applied vacuum configuration: enabled=%v, threshold=%.1f%%, scan_interval=%s, max_concurrent=%d",
vacuumConfig.Enabled, vacuumConfig.GarbageThreshold*100, formatDurationFromSeconds(vacuumConfig.ScanIntervalSeconds), vacuumConfig.MaxConcurrent)
return nil
}
// getCurrentVacuumConfig gets the current configuration from detector and scheduler
func (ui *UITemplProvider) getCurrentVacuumConfig() *VacuumConfig {
config := &VacuumConfig{
// Default values (fallback if detectors/schedulers are nil)
Enabled: true,
GarbageThreshold: 0.3,
ScanIntervalSeconds: int((30 * time.Minute).Seconds()),
MinVolumeAgeSeconds: int((1 * time.Hour).Seconds()),
MaxConcurrent: 2,
MinIntervalSeconds: int((6 * time.Hour).Seconds()),
}
// Get current values from detector
if ui.detector != nil {
config.Enabled = ui.detector.IsEnabled()
config.GarbageThreshold = ui.detector.GetGarbageThreshold()
config.ScanIntervalSeconds = int(ui.detector.ScanInterval().Seconds())
config.MinVolumeAgeSeconds = int(ui.detector.GetMinVolumeAge().Seconds())
}
// Get current values from scheduler
if ui.scheduler != nil {
config.MaxConcurrent = ui.scheduler.GetMaxConcurrent()
config.MinIntervalSeconds = int(ui.scheduler.GetMinInterval().Seconds())
}
return config
}
// floatPtr is a helper function to create float64 pointers
func floatPtr(f float64) *float64 {
return &f
}
// RegisterUITempl registers the vacuum templ UI provider with the UI registry
func RegisterUITempl(uiRegistry *types.UITemplRegistry, detector *VacuumDetector, scheduler *VacuumScheduler) {
uiProvider := NewUITemplProvider(detector, scheduler)
uiRegistry.RegisterUI(uiProvider)
glog.V(1).Infof("✅ Registered vacuum task templ UI provider")
}

View File

@@ -0,0 +1,79 @@
package vacuum
import (
"fmt"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/tasks"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Task implements vacuum operation to reclaim disk space
type Task struct {
*tasks.BaseTask
server string
volumeID uint32
}
// NewTask creates a new vacuum task instance
func NewTask(server string, volumeID uint32) *Task {
task := &Task{
BaseTask: tasks.NewBaseTask(types.TaskTypeVacuum),
server: server,
volumeID: volumeID,
}
return task
}
// Execute executes the vacuum task
func (t *Task) Execute(params types.TaskParams) error {
glog.Infof("Starting vacuum task for volume %d on server %s", t.volumeID, t.server)
// Simulate vacuum operation with progress updates
steps := []struct {
name string
duration time.Duration
progress float64
}{
{"Scanning volume", 1 * time.Second, 20},
{"Identifying deleted files", 2 * time.Second, 50},
{"Compacting data", 3 * time.Second, 80},
{"Finalizing vacuum", 1 * time.Second, 100},
}
for _, step := range steps {
if t.IsCancelled() {
return fmt.Errorf("vacuum task cancelled")
}
glog.V(1).Infof("Vacuum task step: %s", step.name)
t.SetProgress(step.progress)
// Simulate work
time.Sleep(step.duration)
}
glog.Infof("Vacuum task completed for volume %d on server %s", t.volumeID, t.server)
return nil
}
// Validate validates the task parameters
func (t *Task) Validate(params types.TaskParams) error {
if params.VolumeID == 0 {
return fmt.Errorf("volume_id is required")
}
if params.Server == "" {
return fmt.Errorf("server is required")
}
return nil
}
// EstimateTime estimates the time needed for the task
func (t *Task) EstimateTime(params types.TaskParams) time.Duration {
// Base time for vacuum operation
baseTime := 25 * time.Second
// Could adjust based on volume size or usage patterns
return baseTime
}

View File

@@ -0,0 +1,132 @@
package vacuum
import (
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// VacuumDetector implements vacuum task detection using code instead of schemas
type VacuumDetector struct {
enabled bool
garbageThreshold float64
minVolumeAge time.Duration
scanInterval time.Duration
}
// Compile-time interface assertions
var (
_ types.TaskDetector = (*VacuumDetector)(nil)
_ types.PolicyConfigurableDetector = (*VacuumDetector)(nil)
)
// NewVacuumDetector creates a new simple vacuum detector
func NewVacuumDetector() *VacuumDetector {
return &VacuumDetector{
enabled: true,
garbageThreshold: 0.3,
minVolumeAge: 24 * time.Hour,
scanInterval: 30 * time.Minute,
}
}
// GetTaskType returns the task type
func (d *VacuumDetector) GetTaskType() types.TaskType {
return types.TaskTypeVacuum
}
// ScanForTasks scans for volumes that need vacuum operations
func (d *VacuumDetector) ScanForTasks(volumeMetrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterInfo) ([]*types.TaskDetectionResult, error) {
if !d.enabled {
return nil, nil
}
var results []*types.TaskDetectionResult
for _, metric := range volumeMetrics {
// Check if volume needs vacuum
if metric.GarbageRatio >= d.garbageThreshold && metric.Age >= d.minVolumeAge {
// Higher priority for volumes with more garbage
priority := types.TaskPriorityNormal
if metric.GarbageRatio > 0.6 {
priority = types.TaskPriorityHigh
}
result := &types.TaskDetectionResult{
TaskType: types.TaskTypeVacuum,
VolumeID: metric.VolumeID,
Server: metric.Server,
Collection: metric.Collection,
Priority: priority,
Reason: "Volume has excessive garbage requiring vacuum",
Parameters: map[string]interface{}{
"garbage_ratio": metric.GarbageRatio,
"volume_age": metric.Age.String(),
},
ScheduleAt: time.Now(),
}
results = append(results, result)
}
}
glog.V(2).Infof("Vacuum detector found %d volumes needing vacuum", len(results))
return results, nil
}
// ScanInterval returns how often this detector should scan
func (d *VacuumDetector) ScanInterval() time.Duration {
return d.scanInterval
}
// IsEnabled returns whether this detector is enabled
func (d *VacuumDetector) IsEnabled() bool {
return d.enabled
}
// Configuration setters
func (d *VacuumDetector) SetEnabled(enabled bool) {
d.enabled = enabled
}
func (d *VacuumDetector) SetGarbageThreshold(threshold float64) {
d.garbageThreshold = threshold
}
func (d *VacuumDetector) SetScanInterval(interval time.Duration) {
d.scanInterval = interval
}
func (d *VacuumDetector) SetMinVolumeAge(age time.Duration) {
d.minVolumeAge = age
}
// GetGarbageThreshold returns the current garbage threshold
func (d *VacuumDetector) GetGarbageThreshold() float64 {
return d.garbageThreshold
}
// GetMinVolumeAge returns the minimum volume age
func (d *VacuumDetector) GetMinVolumeAge() time.Duration {
return d.minVolumeAge
}
// GetScanInterval returns the scan interval
func (d *VacuumDetector) GetScanInterval() time.Duration {
return d.scanInterval
}
// ConfigureFromPolicy configures the detector based on the maintenance policy
func (d *VacuumDetector) ConfigureFromPolicy(policy interface{}) {
// Type assert to the maintenance policy type we expect
if maintenancePolicy, ok := policy.(interface {
GetVacuumEnabled() bool
GetVacuumGarbageRatio() float64
}); ok {
d.SetEnabled(maintenancePolicy.GetVacuumEnabled())
d.SetGarbageThreshold(maintenancePolicy.GetVacuumGarbageRatio())
} else {
glog.V(1).Infof("Could not configure vacuum detector from policy: unsupported policy type")
}
}

View File

@@ -0,0 +1,81 @@
package vacuum
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/worker/tasks"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// Factory creates vacuum task instances
type Factory struct {
*tasks.BaseTaskFactory
}
// NewFactory creates a new vacuum task factory
func NewFactory() *Factory {
return &Factory{
BaseTaskFactory: tasks.NewBaseTaskFactory(
types.TaskTypeVacuum,
[]string{"vacuum", "storage"},
"Vacuum operation to reclaim disk space by removing deleted files",
),
}
}
// Create creates a new vacuum task instance
func (f *Factory) Create(params types.TaskParams) (types.TaskInterface, error) {
// Validate parameters
if params.VolumeID == 0 {
return nil, fmt.Errorf("volume_id is required")
}
if params.Server == "" {
return nil, fmt.Errorf("server is required")
}
task := NewTask(params.Server, params.VolumeID)
task.SetEstimatedDuration(task.EstimateTime(params))
return task, nil
}
// Shared detector and scheduler instances
var (
sharedDetector *VacuumDetector
sharedScheduler *VacuumScheduler
)
// getSharedInstances returns the shared detector and scheduler instances
func getSharedInstances() (*VacuumDetector, *VacuumScheduler) {
if sharedDetector == nil {
sharedDetector = NewVacuumDetector()
}
if sharedScheduler == nil {
sharedScheduler = NewVacuumScheduler()
}
return sharedDetector, sharedScheduler
}
// GetSharedInstances returns the shared detector and scheduler instances (public access)
func GetSharedInstances() (*VacuumDetector, *VacuumScheduler) {
return getSharedInstances()
}
// Auto-register this task when the package is imported
func init() {
factory := NewFactory()
tasks.AutoRegister(types.TaskTypeVacuum, factory)
// Get shared instances for all registrations
detector, scheduler := getSharedInstances()
// Register with types registry
tasks.AutoRegisterTypes(func(registry *types.TaskRegistry) {
registry.RegisterTask(detector, scheduler)
})
// Register with UI registry using the same instances
tasks.AutoRegisterUI(func(uiRegistry *types.UIRegistry) {
RegisterUI(uiRegistry, detector, scheduler)
})
}

View File

@@ -0,0 +1,111 @@
package vacuum
import (
"time"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
)
// VacuumScheduler implements vacuum task scheduling using code instead of schemas
type VacuumScheduler struct {
enabled bool
maxConcurrent int
minInterval time.Duration
}
// Compile-time interface assertions
var (
_ types.TaskScheduler = (*VacuumScheduler)(nil)
)
// NewVacuumScheduler creates a new simple vacuum scheduler
func NewVacuumScheduler() *VacuumScheduler {
return &VacuumScheduler{
enabled: true,
maxConcurrent: 2,
minInterval: 6 * time.Hour,
}
}
// GetTaskType returns the task type
func (s *VacuumScheduler) GetTaskType() types.TaskType {
return types.TaskTypeVacuum
}
// CanScheduleNow determines if a vacuum task can be scheduled right now
func (s *VacuumScheduler) CanScheduleNow(task *types.Task, runningTasks []*types.Task, availableWorkers []*types.Worker) bool {
// Check if scheduler is enabled
if !s.enabled {
return false
}
// Check concurrent limit
runningVacuumCount := 0
for _, runningTask := range runningTasks {
if runningTask.Type == types.TaskTypeVacuum {
runningVacuumCount++
}
}
if runningVacuumCount >= s.maxConcurrent {
return false
}
// Check if there's an available worker with vacuum capability
for _, worker := range availableWorkers {
if worker.CurrentLoad < worker.MaxConcurrent {
for _, capability := range worker.Capabilities {
if capability == types.TaskTypeVacuum {
return true
}
}
}
}
return false
}
// GetPriority returns the priority for this task
func (s *VacuumScheduler) GetPriority(task *types.Task) types.TaskPriority {
// Could adjust priority based on task parameters
if params, ok := task.Parameters["garbage_ratio"].(float64); ok {
if params > 0.8 {
return types.TaskPriorityHigh
}
}
return task.Priority
}
// GetMaxConcurrent returns max concurrent tasks of this type
func (s *VacuumScheduler) GetMaxConcurrent() int {
return s.maxConcurrent
}
// GetDefaultRepeatInterval returns the default interval to wait before repeating vacuum tasks
func (s *VacuumScheduler) GetDefaultRepeatInterval() time.Duration {
return s.minInterval
}
// IsEnabled returns whether this scheduler is enabled
func (s *VacuumScheduler) IsEnabled() bool {
return s.enabled
}
// Configuration setters
func (s *VacuumScheduler) SetEnabled(enabled bool) {
s.enabled = enabled
}
func (s *VacuumScheduler) SetMaxConcurrent(max int) {
s.maxConcurrent = max
}
func (s *VacuumScheduler) SetMinInterval(interval time.Duration) {
s.minInterval = interval
}
// GetMinInterval returns the minimum interval
func (s *VacuumScheduler) GetMinInterval() time.Duration {
return s.minInterval
}

View File

@@ -0,0 +1,268 @@
package types
import (
"sync"
"time"
)
// WorkerConfig represents the configuration for a worker
type WorkerConfig struct {
AdminServer string `json:"admin_server"`
Capabilities []TaskType `json:"capabilities"`
MaxConcurrent int `json:"max_concurrent"`
HeartbeatInterval time.Duration `json:"heartbeat_interval"`
TaskRequestInterval time.Duration `json:"task_request_interval"`
CustomParameters map[string]interface{} `json:"custom_parameters,omitempty"`
}
// MaintenanceConfig represents the configuration for the maintenance system
type MaintenanceConfig struct {
Enabled bool `json:"enabled"`
ScanInterval time.Duration `json:"scan_interval"`
CleanInterval time.Duration `json:"clean_interval"`
TaskRetention time.Duration `json:"task_retention"`
WorkerTimeout time.Duration `json:"worker_timeout"`
Policy *MaintenancePolicy `json:"policy"`
}
// MaintenancePolicy represents policies for maintenance operations
// This is now dynamic - task configurations are stored by task type
type MaintenancePolicy struct {
// Task-specific configurations indexed by task type
TaskConfigs map[TaskType]interface{} `json:"task_configs"`
// Global maintenance settings
GlobalSettings *GlobalMaintenanceSettings `json:"global_settings"`
}
// GlobalMaintenanceSettings contains settings that apply to all tasks
type GlobalMaintenanceSettings struct {
DefaultMaxConcurrent int `json:"default_max_concurrent"`
MaintenanceEnabled bool `json:"maintenance_enabled"`
// Global timing settings
DefaultScanInterval time.Duration `json:"default_scan_interval"`
DefaultTaskTimeout time.Duration `json:"default_task_timeout"`
DefaultRetryCount int `json:"default_retry_count"`
DefaultRetryInterval time.Duration `json:"default_retry_interval"`
// Global thresholds
DefaultPriorityBoostAge time.Duration `json:"default_priority_boost_age"`
GlobalConcurrentLimit int `json:"global_concurrent_limit"`
}
// MaintenanceStats represents statistics for the maintenance system
type MaintenanceStats struct {
TotalTasks int `json:"total_tasks"`
CompletedToday int `json:"completed_today"`
FailedToday int `json:"failed_today"`
ActiveWorkers int `json:"active_workers"`
AverageTaskTime time.Duration `json:"average_task_time"`
TasksByStatus map[TaskStatus]int `json:"tasks_by_status"`
TasksByType map[TaskType]int `json:"tasks_by_type"`
LastScanTime time.Time `json:"last_scan_time"`
NextScanTime time.Time `json:"next_scan_time"`
}
// QueueStats represents statistics for the task queue
type QueueStats struct {
PendingTasks int `json:"pending_tasks"`
AssignedTasks int `json:"assigned_tasks"`
InProgressTasks int `json:"in_progress_tasks"`
CompletedTasks int `json:"completed_tasks"`
FailedTasks int `json:"failed_tasks"`
CancelledTasks int `json:"cancelled_tasks"`
ActiveWorkers int `json:"active_workers"`
}
// MaintenanceConfigData represents the complete maintenance configuration data
type MaintenanceConfigData struct {
Config *MaintenanceConfig `json:"config"`
IsEnabled bool `json:"is_enabled"`
LastScanTime time.Time `json:"last_scan_time"`
NextScanTime time.Time `json:"next_scan_time"`
SystemStats *MaintenanceStats `json:"system_stats"`
}
// MaintenanceQueueData represents data for the maintenance queue UI
type MaintenanceQueueData struct {
Tasks []*Task `json:"tasks"`
Workers []*Worker `json:"workers"`
Stats *QueueStats `json:"stats"`
LastUpdated time.Time `json:"last_updated"`
}
// MaintenanceWorkersData represents data for the maintenance workers UI
type MaintenanceWorkersData struct {
Workers []*WorkerDetailsData `json:"workers"`
ActiveWorkers int `json:"active_workers"`
BusyWorkers int `json:"busy_workers"`
TotalLoad int `json:"total_load"`
LastUpdated time.Time `json:"last_updated"`
}
// defaultCapabilities holds the default capabilities for workers
var defaultCapabilities []TaskType
var defaultCapabilitiesMutex sync.RWMutex
// SetDefaultCapabilities sets the default capabilities for workers
// This should be called after task registration is complete
func SetDefaultCapabilities(capabilities []TaskType) {
defaultCapabilitiesMutex.Lock()
defer defaultCapabilitiesMutex.Unlock()
defaultCapabilities = make([]TaskType, len(capabilities))
copy(defaultCapabilities, capabilities)
}
// GetDefaultCapabilities returns the default capabilities for workers
func GetDefaultCapabilities() []TaskType {
defaultCapabilitiesMutex.RLock()
defer defaultCapabilitiesMutex.RUnlock()
// Return a copy to prevent modification
result := make([]TaskType, len(defaultCapabilities))
copy(result, defaultCapabilities)
return result
}
// DefaultMaintenanceConfig returns default maintenance configuration
func DefaultMaintenanceConfig() *MaintenanceConfig {
return &MaintenanceConfig{
Enabled: true,
ScanInterval: 30 * time.Minute,
CleanInterval: 6 * time.Hour,
TaskRetention: 7 * 24 * time.Hour, // 7 days
WorkerTimeout: 5 * time.Minute,
Policy: NewMaintenancePolicy(),
}
}
// DefaultWorkerConfig returns default worker configuration
func DefaultWorkerConfig() *WorkerConfig {
// Get dynamic capabilities from registered task types
capabilities := GetDefaultCapabilities()
return &WorkerConfig{
AdminServer: "localhost:9333",
MaxConcurrent: 2,
HeartbeatInterval: 30 * time.Second,
TaskRequestInterval: 5 * time.Second,
Capabilities: capabilities,
}
}
// NewMaintenancePolicy creates a new dynamic maintenance policy
func NewMaintenancePolicy() *MaintenancePolicy {
return &MaintenancePolicy{
TaskConfigs: make(map[TaskType]interface{}),
GlobalSettings: &GlobalMaintenanceSettings{
DefaultMaxConcurrent: 2,
MaintenanceEnabled: true,
DefaultScanInterval: 30 * time.Minute,
DefaultTaskTimeout: 5 * time.Minute,
DefaultRetryCount: 3,
DefaultRetryInterval: 5 * time.Minute,
DefaultPriorityBoostAge: 24 * time.Hour,
GlobalConcurrentLimit: 5,
},
}
}
// SetTaskConfig sets the configuration for a specific task type
func (p *MaintenancePolicy) SetTaskConfig(taskType TaskType, config interface{}) {
if p.TaskConfigs == nil {
p.TaskConfigs = make(map[TaskType]interface{})
}
p.TaskConfigs[taskType] = config
}
// GetTaskConfig returns the configuration for a specific task type
func (p *MaintenancePolicy) GetTaskConfig(taskType TaskType) interface{} {
if p.TaskConfigs == nil {
return nil
}
return p.TaskConfigs[taskType]
}
// IsTaskEnabled returns whether a task type is enabled (generic helper)
func (p *MaintenancePolicy) IsTaskEnabled(taskType TaskType) bool {
if !p.GlobalSettings.MaintenanceEnabled {
return false
}
config := p.GetTaskConfig(taskType)
if config == nil {
return false
}
// Try to get enabled field from config using type assertion
if configMap, ok := config.(map[string]interface{}); ok {
if enabled, exists := configMap["enabled"]; exists {
if enabledBool, ok := enabled.(bool); ok {
return enabledBool
}
}
}
// If we can't determine from config, default to global setting
return p.GlobalSettings.MaintenanceEnabled
}
// GetMaxConcurrent returns the max concurrent setting for a task type
func (p *MaintenancePolicy) GetMaxConcurrent(taskType TaskType) int {
config := p.GetTaskConfig(taskType)
if config == nil {
return p.GlobalSettings.DefaultMaxConcurrent
}
// Try to get max_concurrent field from config
if configMap, ok := config.(map[string]interface{}); ok {
if maxConcurrent, exists := configMap["max_concurrent"]; exists {
if maxConcurrentInt, ok := maxConcurrent.(int); ok {
return maxConcurrentInt
}
if maxConcurrentFloat, ok := maxConcurrent.(float64); ok {
return int(maxConcurrentFloat)
}
}
}
return p.GlobalSettings.DefaultMaxConcurrent
}
// GetScanInterval returns the scan interval for a task type
func (p *MaintenancePolicy) GetScanInterval(taskType TaskType) time.Duration {
config := p.GetTaskConfig(taskType)
if config == nil {
return p.GlobalSettings.DefaultScanInterval
}
// Try to get scan_interval field from config
if configMap, ok := config.(map[string]interface{}); ok {
if scanInterval, exists := configMap["scan_interval"]; exists {
if scanIntervalDuration, ok := scanInterval.(time.Duration); ok {
return scanIntervalDuration
}
if scanIntervalString, ok := scanInterval.(string); ok {
if duration, err := time.ParseDuration(scanIntervalString); err == nil {
return duration
}
}
}
}
return p.GlobalSettings.DefaultScanInterval
}
// GetAllTaskTypes returns all configured task types
func (p *MaintenancePolicy) GetAllTaskTypes() []TaskType {
if p.TaskConfigs == nil {
return []TaskType{}
}
taskTypes := make([]TaskType, 0, len(p.TaskConfigs))
for taskType := range p.TaskConfigs {
taskTypes = append(taskTypes, taskType)
}
return taskTypes
}

View File

@@ -0,0 +1,40 @@
package types
import (
"time"
)
// ClusterInfo contains cluster information for task detection
type ClusterInfo struct {
Servers []*VolumeServerInfo
TotalVolumes int
TotalServers int
LastUpdated time.Time
}
// VolumeHealthMetrics contains health information about a volume (simplified)
type VolumeHealthMetrics struct {
VolumeID uint32
Server string
Collection string
Size uint64
DeletedBytes uint64
GarbageRatio float64
LastModified time.Time
Age time.Duration
ReplicaCount int
ExpectedReplicas int
IsReadOnly bool
HasRemoteCopy bool
IsECVolume bool
FullnessRatio float64
}
// VolumeServerInfo contains information about a volume server (simplified)
type VolumeServerInfo struct {
Address string
Volumes int
UsedSpace uint64
FreeSpace uint64
IsActive bool
}

View File

@@ -0,0 +1,28 @@
package types
import (
"time"
)
// TaskDetector defines the interface for task detection
type TaskDetector interface {
// GetTaskType returns the task type this detector handles
GetTaskType() TaskType
// ScanForTasks scans for tasks that need to be executed
ScanForTasks(volumeMetrics []*VolumeHealthMetrics, clusterInfo *ClusterInfo) ([]*TaskDetectionResult, error)
// ScanInterval returns how often this detector should scan
ScanInterval() time.Duration
// IsEnabled returns whether this detector is enabled
IsEnabled() bool
}
// PolicyConfigurableDetector defines the interface for detectors that can be configured from policy
type PolicyConfigurableDetector interface {
TaskDetector
// ConfigureFromPolicy configures the detector based on the maintenance policy
ConfigureFromPolicy(policy interface{})
}

View File

@@ -0,0 +1,54 @@
package types
// TaskRegistry manages task detectors and schedulers
type TaskRegistry struct {
detectors map[TaskType]TaskDetector
schedulers map[TaskType]TaskScheduler
}
// NewTaskRegistry creates a new simple task registry
func NewTaskRegistry() *TaskRegistry {
return &TaskRegistry{
detectors: make(map[TaskType]TaskDetector),
schedulers: make(map[TaskType]TaskScheduler),
}
}
// RegisterTask registers both detector and scheduler for a task type
func (r *TaskRegistry) RegisterTask(detector TaskDetector, scheduler TaskScheduler) {
taskType := detector.GetTaskType()
if taskType != scheduler.GetTaskType() {
panic("detector and scheduler task types must match")
}
r.detectors[taskType] = detector
r.schedulers[taskType] = scheduler
}
// GetDetector returns the detector for a task type
func (r *TaskRegistry) GetDetector(taskType TaskType) TaskDetector {
return r.detectors[taskType]
}
// GetScheduler returns the scheduler for a task type
func (r *TaskRegistry) GetScheduler(taskType TaskType) TaskScheduler {
return r.schedulers[taskType]
}
// GetAllDetectors returns all registered detectors
func (r *TaskRegistry) GetAllDetectors() map[TaskType]TaskDetector {
result := make(map[TaskType]TaskDetector)
for k, v := range r.detectors {
result[k] = v
}
return result
}
// GetAllSchedulers returns all registered schedulers
func (r *TaskRegistry) GetAllSchedulers() map[TaskType]TaskScheduler {
result := make(map[TaskType]TaskScheduler)
for k, v := range r.schedulers {
result[k] = v
}
return result
}

View File

@@ -0,0 +1,32 @@
package types
import "time"
// TaskScheduler defines the interface for task scheduling
type TaskScheduler interface {
// GetTaskType returns the task type this scheduler handles
GetTaskType() TaskType
// CanScheduleNow determines if a task can be scheduled now
CanScheduleNow(task *Task, runningTasks []*Task, availableWorkers []*Worker) bool
// GetPriority returns the priority for tasks of this type
GetPriority(task *Task) TaskPriority
// GetMaxConcurrent returns the maximum concurrent tasks of this type
GetMaxConcurrent() int
// GetDefaultRepeatInterval returns the default interval to wait before repeating tasks of this type
GetDefaultRepeatInterval() time.Duration
// IsEnabled returns whether this scheduler is enabled
IsEnabled() bool
}
// PolicyConfigurableScheduler defines the interface for schedulers that can be configured from policy
type PolicyConfigurableScheduler interface {
TaskScheduler
// ConfigureFromPolicy configures the scheduler based on the maintenance policy
ConfigureFromPolicy(policy interface{})
}

View File

@@ -0,0 +1,89 @@
package types
import (
"time"
)
// TaskType represents the type of maintenance task
type TaskType string
const (
TaskTypeVacuum TaskType = "vacuum"
TaskTypeErasureCoding TaskType = "erasure_coding"
TaskTypeBalance TaskType = "balance"
)
// TaskStatus represents the status of a maintenance task
type TaskStatus string
const (
TaskStatusPending TaskStatus = "pending"
TaskStatusAssigned TaskStatus = "assigned"
TaskStatusInProgress TaskStatus = "in_progress"
TaskStatusCompleted TaskStatus = "completed"
TaskStatusFailed TaskStatus = "failed"
TaskStatusCancelled TaskStatus = "cancelled"
)
// TaskPriority represents the priority of a maintenance task
type TaskPriority int
const (
TaskPriorityLow TaskPriority = 1
TaskPriorityNormal TaskPriority = 5
TaskPriorityHigh TaskPriority = 10
)
// Task represents a maintenance task
type Task struct {
ID string `json:"id"`
Type TaskType `json:"type"`
Status TaskStatus `json:"status"`
Priority TaskPriority `json:"priority"`
VolumeID uint32 `json:"volume_id,omitempty"`
Server string `json:"server,omitempty"`
Collection string `json:"collection,omitempty"`
WorkerID string `json:"worker_id,omitempty"`
Progress float64 `json:"progress"`
Error string `json:"error,omitempty"`
Parameters map[string]interface{} `json:"parameters,omitempty"`
CreatedAt time.Time `json:"created_at"`
ScheduledAt time.Time `json:"scheduled_at"`
StartedAt *time.Time `json:"started_at,omitempty"`
CompletedAt *time.Time `json:"completed_at,omitempty"`
RetryCount int `json:"retry_count"`
MaxRetries int `json:"max_retries"`
}
// TaskParams represents parameters for task execution
type TaskParams struct {
VolumeID uint32 `json:"volume_id,omitempty"`
Server string `json:"server,omitempty"`
Collection string `json:"collection,omitempty"`
Parameters map[string]interface{} `json:"parameters,omitempty"`
}
// TaskDetectionResult represents the result of scanning for maintenance needs
type TaskDetectionResult struct {
TaskType TaskType `json:"task_type"`
VolumeID uint32 `json:"volume_id,omitempty"`
Server string `json:"server,omitempty"`
Collection string `json:"collection,omitempty"`
Priority TaskPriority `json:"priority"`
Reason string `json:"reason"`
Parameters map[string]interface{} `json:"parameters,omitempty"`
ScheduleAt time.Time `json:"schedule_at"`
}
// ClusterReplicationTask represents a cluster replication task parameters
type ClusterReplicationTask struct {
SourcePath string `json:"source_path"`
TargetCluster string `json:"target_cluster"`
TargetPath string `json:"target_path"`
ReplicationMode string `json:"replication_mode"` // "sync", "async", "backup"
Priority int `json:"priority"`
Checksum string `json:"checksum,omitempty"`
FileSize int64 `json:"file_size"`
CreatedAt time.Time `json:"created_at"`
Metadata map[string]string `json:"metadata,omitempty"`
}

View File

@@ -0,0 +1,281 @@
package types
import (
"fmt"
"html/template"
"time"
)
// TaskUIProvider defines how tasks provide their configuration UI
type TaskUIProvider interface {
// GetTaskType returns the task type
GetTaskType() TaskType
// GetDisplayName returns the human-readable name
GetDisplayName() string
// GetDescription returns a description of what this task does
GetDescription() string
// GetIcon returns the icon CSS class or HTML for this task type
GetIcon() string
// RenderConfigForm renders the configuration form HTML
RenderConfigForm(currentConfig interface{}) (template.HTML, error)
// ParseConfigForm parses form data into configuration
ParseConfigForm(formData map[string][]string) (interface{}, error)
// GetCurrentConfig returns the current configuration
GetCurrentConfig() interface{}
// ApplyConfig applies the new configuration
ApplyConfig(config interface{}) error
}
// TaskStats represents runtime statistics for a task type
type TaskStats struct {
TaskType TaskType `json:"task_type"`
DisplayName string `json:"display_name"`
Enabled bool `json:"enabled"`
LastScan time.Time `json:"last_scan"`
NextScan time.Time `json:"next_scan"`
PendingTasks int `json:"pending_tasks"`
RunningTasks int `json:"running_tasks"`
CompletedToday int `json:"completed_today"`
FailedToday int `json:"failed_today"`
MaxConcurrent int `json:"max_concurrent"`
ScanInterval time.Duration `json:"scan_interval"`
}
// UIRegistry manages task UI providers
type UIRegistry struct {
providers map[TaskType]TaskUIProvider
}
// NewUIRegistry creates a new UI registry
func NewUIRegistry() *UIRegistry {
return &UIRegistry{
providers: make(map[TaskType]TaskUIProvider),
}
}
// RegisterUI registers a task UI provider
func (r *UIRegistry) RegisterUI(provider TaskUIProvider) {
r.providers[provider.GetTaskType()] = provider
}
// GetProvider returns the UI provider for a task type
func (r *UIRegistry) GetProvider(taskType TaskType) TaskUIProvider {
return r.providers[taskType]
}
// GetAllProviders returns all registered UI providers
func (r *UIRegistry) GetAllProviders() map[TaskType]TaskUIProvider {
result := make(map[TaskType]TaskUIProvider)
for k, v := range r.providers {
result[k] = v
}
return result
}
// Common UI data structures for shared components
type TaskListData struct {
Tasks []*Task `json:"tasks"`
TaskStats []*TaskStats `json:"task_stats"`
LastUpdated time.Time `json:"last_updated"`
}
type TaskDetailsData struct {
Task *Task `json:"task"`
TaskType TaskType `json:"task_type"`
DisplayName string `json:"display_name"`
Description string `json:"description"`
Stats *TaskStats `json:"stats"`
ConfigForm template.HTML `json:"config_form"`
LastUpdated time.Time `json:"last_updated"`
}
// Common form field types for simple form building
type FormField struct {
Name string `json:"name"`
Label string `json:"label"`
Type string `json:"type"` // text, number, checkbox, select, duration
Value interface{} `json:"value"`
Description string `json:"description"`
Required bool `json:"required"`
Options []FormOption `json:"options,omitempty"` // For select fields
}
type FormOption struct {
Value string `json:"value"`
Label string `json:"label"`
}
// Helper for building forms in code
type FormBuilder struct {
fields []FormField
}
// NewFormBuilder creates a new form builder
func NewFormBuilder() *FormBuilder {
return &FormBuilder{
fields: make([]FormField, 0),
}
}
// AddTextField adds a text input field
func (fb *FormBuilder) AddTextField(name, label, description string, value string, required bool) *FormBuilder {
fb.fields = append(fb.fields, FormField{
Name: name,
Label: label,
Type: "text",
Value: value,
Description: description,
Required: required,
})
return fb
}
// AddNumberField adds a number input field
func (fb *FormBuilder) AddNumberField(name, label, description string, value float64, required bool) *FormBuilder {
fb.fields = append(fb.fields, FormField{
Name: name,
Label: label,
Type: "number",
Value: value,
Description: description,
Required: required,
})
return fb
}
// AddCheckboxField adds a checkbox field
func (fb *FormBuilder) AddCheckboxField(name, label, description string, value bool) *FormBuilder {
fb.fields = append(fb.fields, FormField{
Name: name,
Label: label,
Type: "checkbox",
Value: value,
Description: description,
Required: false,
})
return fb
}
// AddSelectField adds a select dropdown field
func (fb *FormBuilder) AddSelectField(name, label, description string, value string, options []FormOption, required bool) *FormBuilder {
fb.fields = append(fb.fields, FormField{
Name: name,
Label: label,
Type: "select",
Value: value,
Description: description,
Required: required,
Options: options,
})
return fb
}
// AddDurationField adds a duration input field
func (fb *FormBuilder) AddDurationField(name, label, description string, value time.Duration, required bool) *FormBuilder {
fb.fields = append(fb.fields, FormField{
Name: name,
Label: label,
Type: "duration",
Value: value.String(),
Description: description,
Required: required,
})
return fb
}
// Build generates the HTML form fields with Bootstrap styling
func (fb *FormBuilder) Build() template.HTML {
html := ""
for _, field := range fb.fields {
html += fb.renderField(field)
}
return template.HTML(html)
}
// renderField renders a single form field with Bootstrap classes
func (fb *FormBuilder) renderField(field FormField) string {
html := "<div class=\"mb-3\">\n"
// Special handling for checkbox fields
if field.Type == "checkbox" {
checked := ""
if field.Value.(bool) {
checked = " checked"
}
html += " <div class=\"form-check\">\n"
html += " <input type=\"checkbox\" class=\"form-check-input\" id=\"" + field.Name + "\" name=\"" + field.Name + "\"" + checked + ">\n"
html += " <label class=\"form-check-label\" for=\"" + field.Name + "\">" + field.Label + "</label>\n"
html += " </div>\n"
// Description for checkbox
if field.Description != "" {
html += " <div class=\"form-text text-muted\">" + field.Description + "</div>\n"
}
html += "</div>\n"
return html
}
// Label for non-checkbox fields
required := ""
if field.Required {
required = " <span class=\"text-danger\">*</span>"
}
html += " <label for=\"" + field.Name + "\" class=\"form-label\">" + field.Label + required + "</label>\n"
// Input based on type
switch field.Type {
case "text":
html += " <input type=\"text\" class=\"form-control\" id=\"" + field.Name + "\" name=\"" + field.Name + "\" value=\"" + field.Value.(string) + "\""
if field.Required {
html += " required"
}
html += ">\n"
case "number":
html += " <input type=\"number\" class=\"form-control\" id=\"" + field.Name + "\" name=\"" + field.Name + "\" step=\"any\" value=\"" +
fmt.Sprintf("%v", field.Value) + "\""
if field.Required {
html += " required"
}
html += ">\n"
case "select":
html += " <select class=\"form-select\" id=\"" + field.Name + "\" name=\"" + field.Name + "\""
if field.Required {
html += " required"
}
html += ">\n"
for _, option := range field.Options {
selected := ""
if option.Value == field.Value.(string) {
selected = " selected"
}
html += " <option value=\"" + option.Value + "\"" + selected + ">" + option.Label + "</option>\n"
}
html += " </select>\n"
case "duration":
html += " <input type=\"text\" class=\"form-control\" id=\"" + field.Name + "\" name=\"" + field.Name + "\" value=\"" + field.Value.(string) +
"\" placeholder=\"e.g., 30m, 2h, 24h\""
if field.Required {
html += " required"
}
html += ">\n"
}
// Description for non-checkbox fields
if field.Description != "" {
html += " <div class=\"form-text text-muted\">" + field.Description + "</div>\n"
}
html += "</div>\n"
return html
}

View File

@@ -0,0 +1,63 @@
package types
import (
"github.com/seaweedfs/seaweedfs/weed/admin/view/components"
)
// TaskUITemplProvider defines how tasks provide their configuration UI using templ components
type TaskUITemplProvider interface {
// GetTaskType returns the task type
GetTaskType() TaskType
// GetDisplayName returns the human-readable name
GetDisplayName() string
// GetDescription returns a description of what this task does
GetDescription() string
// GetIcon returns the icon CSS class or HTML for this task type
GetIcon() string
// RenderConfigSections renders the configuration as templ section data
RenderConfigSections(currentConfig interface{}) ([]components.ConfigSectionData, error)
// ParseConfigForm parses form data into configuration
ParseConfigForm(formData map[string][]string) (interface{}, error)
// GetCurrentConfig returns the current configuration
GetCurrentConfig() interface{}
// ApplyConfig applies the new configuration
ApplyConfig(config interface{}) error
}
// UITemplRegistry manages task UI providers that use templ components
type UITemplRegistry struct {
providers map[TaskType]TaskUITemplProvider
}
// NewUITemplRegistry creates a new templ-based UI registry
func NewUITemplRegistry() *UITemplRegistry {
return &UITemplRegistry{
providers: make(map[TaskType]TaskUITemplProvider),
}
}
// RegisterUI registers a task UI provider
func (r *UITemplRegistry) RegisterUI(provider TaskUITemplProvider) {
r.providers[provider.GetTaskType()] = provider
}
// GetProvider returns the UI provider for a task type
func (r *UITemplRegistry) GetProvider(taskType TaskType) TaskUITemplProvider {
return r.providers[taskType]
}
// GetAllProviders returns all registered UI providers
func (r *UITemplRegistry) GetAllProviders() map[TaskType]TaskUITemplProvider {
result := make(map[TaskType]TaskUITemplProvider)
for k, v := range r.providers {
result[k] = v
}
return result
}

View File

@@ -0,0 +1,111 @@
package types
import (
"time"
)
// Worker represents a maintenance worker instance
type Worker struct {
ID string `json:"id"`
Address string `json:"address"`
LastHeartbeat time.Time `json:"last_heartbeat"`
Status string `json:"status"` // active, inactive, busy
CurrentTask *Task `json:"current_task,omitempty"`
Capabilities []TaskType `json:"capabilities"`
MaxConcurrent int `json:"max_concurrent"`
CurrentLoad int `json:"current_load"`
}
// WorkerStatus represents the current status of a worker
type WorkerStatus struct {
WorkerID string `json:"worker_id"`
Status string `json:"status"`
Capabilities []TaskType `json:"capabilities"`
MaxConcurrent int `json:"max_concurrent"`
CurrentLoad int `json:"current_load"`
LastHeartbeat time.Time `json:"last_heartbeat"`
CurrentTasks []Task `json:"current_tasks"`
Uptime time.Duration `json:"uptime"`
TasksCompleted int `json:"tasks_completed"`
TasksFailed int `json:"tasks_failed"`
}
// WorkerDetailsData represents detailed worker information
type WorkerDetailsData struct {
Worker *Worker `json:"worker"`
CurrentTasks []*Task `json:"current_tasks"`
RecentTasks []*Task `json:"recent_tasks"`
Performance *WorkerPerformance `json:"performance"`
LastUpdated time.Time `json:"last_updated"`
}
// WorkerPerformance tracks worker performance metrics
type WorkerPerformance struct {
TasksCompleted int `json:"tasks_completed"`
TasksFailed int `json:"tasks_failed"`
AverageTaskTime time.Duration `json:"average_task_time"`
Uptime time.Duration `json:"uptime"`
SuccessRate float64 `json:"success_rate"`
}
// RegistryStats represents statistics for the worker registry
type RegistryStats struct {
TotalWorkers int `json:"total_workers"`
ActiveWorkers int `json:"active_workers"`
BusyWorkers int `json:"busy_workers"`
IdleWorkers int `json:"idle_workers"`
TotalTasks int `json:"total_tasks"`
CompletedTasks int `json:"completed_tasks"`
FailedTasks int `json:"failed_tasks"`
StartTime time.Time `json:"start_time"`
Uptime time.Duration `json:"uptime"`
LastUpdated time.Time `json:"last_updated"`
}
// WorkerSummary represents a summary of all workers
type WorkerSummary struct {
TotalWorkers int `json:"total_workers"`
ByStatus map[string]int `json:"by_status"`
ByCapability map[TaskType]int `json:"by_capability"`
TotalLoad int `json:"total_load"`
MaxCapacity int `json:"max_capacity"`
}
// WorkerFactory creates worker instances
type WorkerFactory interface {
Create(config WorkerConfig) (WorkerInterface, error)
Type() string
Description() string
}
// WorkerInterface defines the interface for all worker implementations
type WorkerInterface interface {
ID() string
Start() error
Stop() error
RegisterTask(taskType TaskType, factory TaskFactory)
GetCapabilities() []TaskType
GetStatus() WorkerStatus
HandleTask(task *Task) error
SetCapabilities(capabilities []TaskType)
SetMaxConcurrent(max int)
SetHeartbeatInterval(interval time.Duration)
SetTaskRequestInterval(interval time.Duration)
}
// TaskFactory creates task instances
type TaskFactory interface {
Create(params TaskParams) (TaskInterface, error)
Capabilities() []string
Description() string
}
// TaskInterface defines the interface for all task implementations
type TaskInterface interface {
Type() TaskType
Execute(params TaskParams) error
Validate(params TaskParams) error
EstimateTime(params TaskParams) time.Duration
GetProgress() float64
Cancel() error
}

410
weed/worker/worker.go Normal file
View File

@@ -0,0 +1,410 @@
package worker
import (
"fmt"
"os"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/worker/tasks"
"github.com/seaweedfs/seaweedfs/weed/worker/types"
// Import task packages to trigger their auto-registration
_ "github.com/seaweedfs/seaweedfs/weed/worker/tasks/balance"
_ "github.com/seaweedfs/seaweedfs/weed/worker/tasks/erasure_coding"
_ "github.com/seaweedfs/seaweedfs/weed/worker/tasks/vacuum"
)
// Worker represents a maintenance worker instance
type Worker struct {
id string
config *types.WorkerConfig
registry *tasks.TaskRegistry
currentTasks map[string]*types.Task
adminClient AdminClient
running bool
stopChan chan struct{}
mutex sync.RWMutex
startTime time.Time
tasksCompleted int
tasksFailed int
heartbeatTicker *time.Ticker
requestTicker *time.Ticker
}
// AdminClient defines the interface for communicating with the admin server
type AdminClient interface {
Connect() error
Disconnect() error
RegisterWorker(worker *types.Worker) error
SendHeartbeat(workerID string, status *types.WorkerStatus) error
RequestTask(workerID string, capabilities []types.TaskType) (*types.Task, error)
CompleteTask(taskID string, success bool, errorMsg string) error
UpdateTaskProgress(taskID string, progress float64) error
IsConnected() bool
}
// NewWorker creates a new worker instance
func NewWorker(config *types.WorkerConfig) (*Worker, error) {
if config == nil {
config = types.DefaultWorkerConfig()
}
// Always auto-generate worker ID
hostname, _ := os.Hostname()
workerID := fmt.Sprintf("worker-%s-%d", hostname, time.Now().Unix())
// Use the global registry that already has all tasks registered
registry := tasks.GetGlobalRegistry()
worker := &Worker{
id: workerID,
config: config,
registry: registry,
currentTasks: make(map[string]*types.Task),
stopChan: make(chan struct{}),
startTime: time.Now(),
}
glog.V(1).Infof("Worker created with %d registered task types", len(registry.GetSupportedTypes()))
return worker, nil
}
// ID returns the worker ID
func (w *Worker) ID() string {
return w.id
}
// Start starts the worker
func (w *Worker) Start() error {
w.mutex.Lock()
defer w.mutex.Unlock()
if w.running {
return fmt.Errorf("worker is already running")
}
if w.adminClient == nil {
return fmt.Errorf("admin client is not set")
}
// Connect to admin server
if err := w.adminClient.Connect(); err != nil {
return fmt.Errorf("failed to connect to admin server: %v", err)
}
w.running = true
w.startTime = time.Now()
// Register with admin server
workerInfo := &types.Worker{
ID: w.id,
Capabilities: w.config.Capabilities,
MaxConcurrent: w.config.MaxConcurrent,
Status: "active",
CurrentLoad: 0,
LastHeartbeat: time.Now(),
}
if err := w.adminClient.RegisterWorker(workerInfo); err != nil {
w.running = false
w.adminClient.Disconnect()
return fmt.Errorf("failed to register worker: %v", err)
}
// Start worker loops
go w.heartbeatLoop()
go w.taskRequestLoop()
glog.Infof("Worker %s started", w.id)
return nil
}
// Stop stops the worker
func (w *Worker) Stop() error {
w.mutex.Lock()
defer w.mutex.Unlock()
if !w.running {
return nil
}
w.running = false
close(w.stopChan)
// Stop tickers
if w.heartbeatTicker != nil {
w.heartbeatTicker.Stop()
}
if w.requestTicker != nil {
w.requestTicker.Stop()
}
// Wait for current tasks to complete or timeout
timeout := time.NewTimer(30 * time.Second)
defer timeout.Stop()
for len(w.currentTasks) > 0 {
select {
case <-timeout.C:
glog.Warningf("Worker %s stopping with %d tasks still running", w.id, len(w.currentTasks))
break
case <-time.After(time.Second):
// Check again
}
}
// Disconnect from admin server
if w.adminClient != nil {
if err := w.adminClient.Disconnect(); err != nil {
glog.Errorf("Error disconnecting from admin server: %v", err)
}
}
glog.Infof("Worker %s stopped", w.id)
return nil
}
// RegisterTask registers a task factory
func (w *Worker) RegisterTask(taskType types.TaskType, factory types.TaskFactory) {
w.registry.Register(taskType, factory)
}
// GetCapabilities returns the worker capabilities
func (w *Worker) GetCapabilities() []types.TaskType {
return w.config.Capabilities
}
// GetStatus returns the current worker status
func (w *Worker) GetStatus() types.WorkerStatus {
w.mutex.RLock()
defer w.mutex.RUnlock()
var currentTasks []types.Task
for _, task := range w.currentTasks {
currentTasks = append(currentTasks, *task)
}
status := "active"
if len(w.currentTasks) >= w.config.MaxConcurrent {
status = "busy"
}
return types.WorkerStatus{
WorkerID: w.id,
Status: status,
Capabilities: w.config.Capabilities,
MaxConcurrent: w.config.MaxConcurrent,
CurrentLoad: len(w.currentTasks),
LastHeartbeat: time.Now(),
CurrentTasks: currentTasks,
Uptime: time.Since(w.startTime),
TasksCompleted: w.tasksCompleted,
TasksFailed: w.tasksFailed,
}
}
// HandleTask handles a task execution
func (w *Worker) HandleTask(task *types.Task) error {
w.mutex.Lock()
if len(w.currentTasks) >= w.config.MaxConcurrent {
w.mutex.Unlock()
return fmt.Errorf("worker is at capacity")
}
w.currentTasks[task.ID] = task
w.mutex.Unlock()
// Execute task in goroutine
go w.executeTask(task)
return nil
}
// SetCapabilities sets the worker capabilities
func (w *Worker) SetCapabilities(capabilities []types.TaskType) {
w.config.Capabilities = capabilities
}
// SetMaxConcurrent sets the maximum concurrent tasks
func (w *Worker) SetMaxConcurrent(max int) {
w.config.MaxConcurrent = max
}
// SetHeartbeatInterval sets the heartbeat interval
func (w *Worker) SetHeartbeatInterval(interval time.Duration) {
w.config.HeartbeatInterval = interval
}
// SetTaskRequestInterval sets the task request interval
func (w *Worker) SetTaskRequestInterval(interval time.Duration) {
w.config.TaskRequestInterval = interval
}
// SetAdminClient sets the admin client
func (w *Worker) SetAdminClient(client AdminClient) {
w.adminClient = client
}
// executeTask executes a task
func (w *Worker) executeTask(task *types.Task) {
defer func() {
w.mutex.Lock()
delete(w.currentTasks, task.ID)
w.mutex.Unlock()
}()
glog.Infof("Worker %s executing task %s: %s", w.id, task.ID, task.Type)
// Create task instance
taskParams := types.TaskParams{
VolumeID: task.VolumeID,
Server: task.Server,
Collection: task.Collection,
Parameters: task.Parameters,
}
taskInstance, err := w.registry.CreateTask(task.Type, taskParams)
if err != nil {
w.completeTask(task.ID, false, fmt.Sprintf("failed to create task: %v", err))
return
}
// Execute task
err = taskInstance.Execute(taskParams)
// Report completion
if err != nil {
w.completeTask(task.ID, false, err.Error())
w.tasksFailed++
glog.Errorf("Worker %s failed to execute task %s: %v", w.id, task.ID, err)
} else {
w.completeTask(task.ID, true, "")
w.tasksCompleted++
glog.Infof("Worker %s completed task %s successfully", w.id, task.ID)
}
}
// completeTask reports task completion to admin server
func (w *Worker) completeTask(taskID string, success bool, errorMsg string) {
if w.adminClient != nil {
if err := w.adminClient.CompleteTask(taskID, success, errorMsg); err != nil {
glog.Errorf("Failed to report task completion: %v", err)
}
}
}
// heartbeatLoop sends periodic heartbeats to the admin server
func (w *Worker) heartbeatLoop() {
w.heartbeatTicker = time.NewTicker(w.config.HeartbeatInterval)
defer w.heartbeatTicker.Stop()
for {
select {
case <-w.stopChan:
return
case <-w.heartbeatTicker.C:
w.sendHeartbeat()
}
}
}
// taskRequestLoop periodically requests new tasks from the admin server
func (w *Worker) taskRequestLoop() {
w.requestTicker = time.NewTicker(w.config.TaskRequestInterval)
defer w.requestTicker.Stop()
for {
select {
case <-w.stopChan:
return
case <-w.requestTicker.C:
w.requestTasks()
}
}
}
// sendHeartbeat sends heartbeat to admin server
func (w *Worker) sendHeartbeat() {
if w.adminClient != nil {
if err := w.adminClient.SendHeartbeat(w.id, &types.WorkerStatus{
WorkerID: w.id,
Status: "active",
Capabilities: w.config.Capabilities,
MaxConcurrent: w.config.MaxConcurrent,
CurrentLoad: len(w.currentTasks),
LastHeartbeat: time.Now(),
}); err != nil {
glog.Warningf("Failed to send heartbeat: %v", err)
}
}
}
// requestTasks requests new tasks from the admin server
func (w *Worker) requestTasks() {
w.mutex.RLock()
currentLoad := len(w.currentTasks)
w.mutex.RUnlock()
if currentLoad >= w.config.MaxConcurrent {
return // Already at capacity
}
if w.adminClient != nil {
task, err := w.adminClient.RequestTask(w.id, w.config.Capabilities)
if err != nil {
glog.V(2).Infof("Failed to request task: %v", err)
return
}
if task != nil {
if err := w.HandleTask(task); err != nil {
glog.Errorf("Failed to handle task: %v", err)
}
}
}
}
// GetTaskRegistry returns the task registry
func (w *Worker) GetTaskRegistry() *tasks.TaskRegistry {
return w.registry
}
// GetCurrentTasks returns the current tasks
func (w *Worker) GetCurrentTasks() map[string]*types.Task {
w.mutex.RLock()
defer w.mutex.RUnlock()
tasks := make(map[string]*types.Task)
for id, task := range w.currentTasks {
tasks[id] = task
}
return tasks
}
// GetConfig returns the worker configuration
func (w *Worker) GetConfig() *types.WorkerConfig {
return w.config
}
// GetPerformanceMetrics returns performance metrics
func (w *Worker) GetPerformanceMetrics() *types.WorkerPerformance {
w.mutex.RLock()
defer w.mutex.RUnlock()
uptime := time.Since(w.startTime)
var successRate float64
totalTasks := w.tasksCompleted + w.tasksFailed
if totalTasks > 0 {
successRate = float64(w.tasksCompleted) / float64(totalTasks) * 100
}
return &types.WorkerPerformance{
TasksCompleted: w.tasksCompleted,
TasksFailed: w.tasksFailed,
AverageTaskTime: 0, // Would need to track this
Uptime: uptime,
SuccessRate: successRate,
}
}