package balance import ( "context" "fmt" "io" "time" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/operation" "github.com/seaweedfs/seaweedfs/weed/pb" "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" "github.com/seaweedfs/seaweedfs/weed/pb/worker_pb" "github.com/seaweedfs/seaweedfs/weed/storage/needle" "github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/worker/types" "github.com/seaweedfs/seaweedfs/weed/worker/types/base" "google.golang.org/grpc" ) // BalanceTask implements the Task interface type BalanceTask struct { *base.BaseTask server string volumeID uint32 collection string progress float64 grpcDialOption grpc.DialOption } // NewBalanceTask creates a new balance task instance func NewBalanceTask(id string, server string, volumeID uint32, collection string, grpcDialOption grpc.DialOption) *BalanceTask { return &BalanceTask{ BaseTask: base.NewBaseTask(id, types.TaskTypeBalance), server: server, volumeID: volumeID, collection: collection, grpcDialOption: grpcDialOption, } } // Execute implements the Task interface func (t *BalanceTask) Execute(ctx context.Context, params *worker_pb.TaskParams) error { if params == nil { return fmt.Errorf("task parameters are required") } balanceParams := params.GetBalanceParams() if balanceParams == nil { return fmt.Errorf("balance parameters are required") } // Get source and destination from unified arrays if len(params.Sources) == 0 { return fmt.Errorf("source is required for balance task") } if len(params.Targets) == 0 { return fmt.Errorf("target is required for balance task") } sourceNode := params.Sources[0].Node destNode := params.Targets[0].Node if sourceNode == "" { return fmt.Errorf("source node is required for balance task") } if destNode == "" { return fmt.Errorf("destination node is required for balance task") } t.GetLogger().WithFields(map[string]interface{}{ "volume_id": t.volumeID, "source": sourceNode, "destination": destNode, "collection": t.collection, }).Info("Starting balance task - moving volume") sourceServer := pb.ServerAddress(sourceNode) targetServer := pb.ServerAddress(destNode) volumeId := needle.VolumeId(t.volumeID) // Step 1: Mark volume readonly t.ReportProgress(10.0) t.GetLogger().Info("Marking volume readonly for move") if err := t.markVolumeReadonly(ctx, sourceServer, volumeId); err != nil { return fmt.Errorf("failed to mark volume readonly: %v", err) } // Restore source writability if any subsequent step fails, so the // source volume is not left permanently readonly on abort. sourceMarkedReadonly := true defer func() { if sourceMarkedReadonly { cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 30*time.Second) defer cleanupCancel() if wErr := t.markVolumeWritable(cleanupCtx, sourceServer, volumeId); wErr != nil { glog.Warningf("failed to restore volume %d writability on %s: %v", volumeId, sourceServer, wErr) } } }() // Step 2: Read source volume size before copy (for post-copy verification) t.ReportProgress(15.0) sourceStatus, err := t.readVolumeFileStatus(ctx, sourceServer, volumeId) if err != nil { return fmt.Errorf("failed to read source volume status: %v", err) } // Step 3: Copy volume to destination (VolumeCopy also mounts the volume) t.ReportProgress(20.0) t.GetLogger().Info("Copying volume to destination") lastAppendAtNs, err := t.copyVolume(ctx, sourceServer, targetServer, volumeId) if err != nil { return fmt.Errorf("failed to copy volume: %v", err) } // Step 4: Tail for updates t.ReportProgress(70.0) t.GetLogger().Info("Syncing final updates") if err := t.tailVolume(ctx, sourceServer, targetServer, volumeId, lastAppendAtNs); err != nil { glog.Warningf("Tail operation failed (may be normal): %v", err) } // Step 5: Verify the volume on target before deleting source. // This is a critical safety check — once the source is deleted, data loss // is irreversible. We verify the target has the volume with matching size. t.ReportProgress(85.0) t.GetLogger().Info("Verifying volume on target before deleting source") targetStatus, err := t.readVolumeFileStatus(ctx, targetServer, volumeId) if err != nil { return fmt.Errorf("aborting: cannot verify volume %d on target %s before deleting source: %v", volumeId, targetServer, err) } if targetStatus.DatFileSize != sourceStatus.DatFileSize { return fmt.Errorf("aborting: volume %d .dat size mismatch — source %d bytes, target %d bytes", volumeId, sourceStatus.DatFileSize, targetStatus.DatFileSize) } if targetStatus.FileCount != sourceStatus.FileCount { return fmt.Errorf("aborting: volume %d file count mismatch — source %d, target %d", volumeId, sourceStatus.FileCount, targetStatus.FileCount) } if targetStatus.IdxFileSize != sourceStatus.IdxFileSize { return fmt.Errorf("aborting: volume %d .idx size mismatch — source %d bytes, target %d bytes", volumeId, sourceStatus.IdxFileSize, targetStatus.IdxFileSize) } // Step 6: Delete from source — after this, the move is committed. // Clear the readonly flag so the defer doesn't try to restore writability. t.ReportProgress(90.0) t.GetLogger().Info("Deleting volume from source server") if err := t.deleteVolume(ctx, sourceServer, volumeId); err != nil { return fmt.Errorf("failed to delete volume from source: %v", err) } sourceMarkedReadonly = false t.ReportProgress(100.0) glog.Infof("Balance task completed successfully: volume %d moved from %s to %s", t.volumeID, sourceNode, destNode) return nil } // Validate implements the UnifiedTask interface func (t *BalanceTask) Validate(params *worker_pb.TaskParams) error { if params == nil { return fmt.Errorf("task parameters are required") } balanceParams := params.GetBalanceParams() if balanceParams == nil { return fmt.Errorf("balance parameters are required") } if params.VolumeId != t.volumeID { return fmt.Errorf("volume ID mismatch: expected %d, got %d", t.volumeID, params.VolumeId) } // Validate that at least one source matches our server found := false for _, source := range params.Sources { if source.Node == t.server { found = true break } } if !found { return fmt.Errorf("no source matches expected server %s", t.server) } return nil } // EstimateTime implements the UnifiedTask interface func (t *BalanceTask) EstimateTime(params *worker_pb.TaskParams) time.Duration { // Basic estimate based on simulated steps return 14 * time.Second // Sum of all step durations } // GetProgress returns current progress func (t *BalanceTask) GetProgress() float64 { return t.progress } // Helper methods for real balance operations // markVolumeReadonly marks the volume readonly on the source server. func (t *BalanceTask) markVolumeReadonly(ctx context.Context, server pb.ServerAddress, volumeId needle.VolumeId) error { return operation.WithVolumeServerClient(false, server, t.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { _, err := client.VolumeMarkReadonly(ctx, &volume_server_pb.VolumeMarkReadonlyRequest{ VolumeId: uint32(volumeId), }) return err }) } // markVolumeWritable restores the volume to writable on the source server. func (t *BalanceTask) markVolumeWritable(ctx context.Context, server pb.ServerAddress, volumeId needle.VolumeId) error { return operation.WithVolumeServerClient(false, server, t.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { _, err := client.VolumeMarkWritable(ctx, &volume_server_pb.VolumeMarkWritableRequest{ VolumeId: uint32(volumeId), }) return err }) } // copyVolume copies volume from source to target server. func (t *BalanceTask) copyVolume(ctx context.Context, sourceServer, targetServer pb.ServerAddress, volumeId needle.VolumeId) (uint64, error) { var lastAppendAtNs uint64 err := operation.WithVolumeServerClient(true, targetServer, t.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { stream, err := client.VolumeCopy(ctx, &volume_server_pb.VolumeCopyRequest{ VolumeId: uint32(volumeId), SourceDataNode: string(sourceServer), }) if err != nil { return err } for { resp, recvErr := stream.Recv() if recvErr != nil { if recvErr == io.EOF { break } return recvErr } if resp.LastAppendAtNs != 0 { lastAppendAtNs = resp.LastAppendAtNs } else { // Report copy progress glog.V(1).Infof("Volume %d copy progress: %s", volumeId, util.BytesToHumanReadable(uint64(resp.ProcessedBytes))) } } return nil }) return lastAppendAtNs, err } // tailVolume syncs remaining updates from source to target. func (t *BalanceTask) tailVolume(ctx context.Context, sourceServer, targetServer pb.ServerAddress, volumeId needle.VolumeId, sinceNs uint64) error { return operation.WithVolumeServerClient(true, targetServer, t.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { _, err := client.VolumeTailReceiver(ctx, &volume_server_pb.VolumeTailReceiverRequest{ VolumeId: uint32(volumeId), SinceNs: sinceNs, IdleTimeoutSeconds: 60, // 1 minute timeout SourceVolumeServer: string(sourceServer), }) return err }) } // readVolumeFileStatus reads the volume's file status (sizes, file count) from a server. func (t *BalanceTask) readVolumeFileStatus(ctx context.Context, server pb.ServerAddress, volumeId needle.VolumeId) (*volume_server_pb.ReadVolumeFileStatusResponse, error) { var resp *volume_server_pb.ReadVolumeFileStatusResponse err := operation.WithVolumeServerClient(false, server, t.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { var err error resp, err = client.ReadVolumeFileStatus(ctx, &volume_server_pb.ReadVolumeFileStatusRequest{ VolumeId: uint32(volumeId), }) return err }) return resp, err } // deleteVolume deletes the volume from the server. func (t *BalanceTask) deleteVolume(ctx context.Context, server pb.ServerAddress, volumeId needle.VolumeId) error { return operation.WithVolumeServerClient(false, server, t.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { _, err := client.VolumeDelete(ctx, &volume_server_pb.VolumeDeleteRequest{ VolumeId: uint32(volumeId), OnlyEmpty: false, }) return err }) }