The verification step checked DatFileSize and FileCount but not IdxFileSize, leaving a gap in the copy validation before source deletion.
303 lines
10 KiB
Go
303 lines
10 KiB
Go
package balance
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/operation"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
"github.com/seaweedfs/seaweedfs/weed/worker/types"
|
|
"github.com/seaweedfs/seaweedfs/weed/worker/types/base"
|
|
"google.golang.org/grpc"
|
|
)
|
|
|
|
// BalanceTask implements the Task interface
|
|
type BalanceTask struct {
|
|
*base.BaseTask
|
|
server string
|
|
volumeID uint32
|
|
collection string
|
|
progress float64
|
|
grpcDialOption grpc.DialOption
|
|
}
|
|
|
|
// NewBalanceTask creates a new balance task instance
|
|
func NewBalanceTask(id string, server string, volumeID uint32, collection string, grpcDialOption grpc.DialOption) *BalanceTask {
|
|
return &BalanceTask{
|
|
BaseTask: base.NewBaseTask(id, types.TaskTypeBalance),
|
|
server: server,
|
|
volumeID: volumeID,
|
|
collection: collection,
|
|
grpcDialOption: grpcDialOption,
|
|
}
|
|
}
|
|
|
|
// Execute implements the Task interface
|
|
func (t *BalanceTask) Execute(ctx context.Context, params *worker_pb.TaskParams) error {
|
|
if params == nil {
|
|
return fmt.Errorf("task parameters are required")
|
|
}
|
|
|
|
balanceParams := params.GetBalanceParams()
|
|
if balanceParams == nil {
|
|
return fmt.Errorf("balance parameters are required")
|
|
}
|
|
|
|
// Get source and destination from unified arrays
|
|
if len(params.Sources) == 0 {
|
|
return fmt.Errorf("source is required for balance task")
|
|
}
|
|
if len(params.Targets) == 0 {
|
|
return fmt.Errorf("target is required for balance task")
|
|
}
|
|
|
|
sourceNode := params.Sources[0].Node
|
|
destNode := params.Targets[0].Node
|
|
|
|
if sourceNode == "" {
|
|
return fmt.Errorf("source node is required for balance task")
|
|
}
|
|
if destNode == "" {
|
|
return fmt.Errorf("destination node is required for balance task")
|
|
}
|
|
|
|
t.GetLogger().WithFields(map[string]interface{}{
|
|
"volume_id": t.volumeID,
|
|
"source": sourceNode,
|
|
"destination": destNode,
|
|
"collection": t.collection,
|
|
}).Info("Starting balance task - moving volume")
|
|
|
|
sourceServer := pb.ServerAddress(sourceNode)
|
|
targetServer := pb.ServerAddress(destNode)
|
|
volumeId := needle.VolumeId(t.volumeID)
|
|
|
|
// Step 1: Mark volume readonly
|
|
t.ReportProgress(10.0)
|
|
t.GetLogger().Info("Marking volume readonly for move")
|
|
if err := t.markVolumeReadonly(ctx, sourceServer, volumeId); err != nil {
|
|
return fmt.Errorf("failed to mark volume readonly: %v", err)
|
|
}
|
|
// Restore source writability if any subsequent step fails, so the
|
|
// source volume is not left permanently readonly on abort.
|
|
sourceMarkedReadonly := true
|
|
defer func() {
|
|
if sourceMarkedReadonly {
|
|
cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 30*time.Second)
|
|
defer cleanupCancel()
|
|
if wErr := t.markVolumeWritable(cleanupCtx, sourceServer, volumeId); wErr != nil {
|
|
glog.Warningf("failed to restore volume %d writability on %s: %v", volumeId, sourceServer, wErr)
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Step 2: Read source volume size before copy (for post-copy verification)
|
|
t.ReportProgress(15.0)
|
|
sourceStatus, err := t.readVolumeFileStatus(ctx, sourceServer, volumeId)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read source volume status: %v", err)
|
|
}
|
|
|
|
// Step 3: Copy volume to destination (VolumeCopy also mounts the volume)
|
|
t.ReportProgress(20.0)
|
|
t.GetLogger().Info("Copying volume to destination")
|
|
lastAppendAtNs, err := t.copyVolume(ctx, sourceServer, targetServer, volumeId)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to copy volume: %v", err)
|
|
}
|
|
|
|
// Step 4: Tail for updates
|
|
t.ReportProgress(70.0)
|
|
t.GetLogger().Info("Syncing final updates")
|
|
if err := t.tailVolume(ctx, sourceServer, targetServer, volumeId, lastAppendAtNs); err != nil {
|
|
glog.Warningf("Tail operation failed (may be normal): %v", err)
|
|
}
|
|
|
|
// Step 5: Verify the volume on target before deleting source.
|
|
// This is a critical safety check — once the source is deleted, data loss
|
|
// is irreversible. We verify the target has the volume with matching size.
|
|
t.ReportProgress(85.0)
|
|
t.GetLogger().Info("Verifying volume on target before deleting source")
|
|
targetStatus, err := t.readVolumeFileStatus(ctx, targetServer, volumeId)
|
|
if err != nil {
|
|
return fmt.Errorf("aborting: cannot verify volume %d on target %s before deleting source: %v", volumeId, targetServer, err)
|
|
}
|
|
if targetStatus.DatFileSize != sourceStatus.DatFileSize {
|
|
return fmt.Errorf("aborting: volume %d .dat size mismatch — source %d bytes, target %d bytes",
|
|
volumeId, sourceStatus.DatFileSize, targetStatus.DatFileSize)
|
|
}
|
|
if targetStatus.FileCount != sourceStatus.FileCount {
|
|
return fmt.Errorf("aborting: volume %d file count mismatch — source %d, target %d",
|
|
volumeId, sourceStatus.FileCount, targetStatus.FileCount)
|
|
}
|
|
if targetStatus.IdxFileSize != sourceStatus.IdxFileSize {
|
|
return fmt.Errorf("aborting: volume %d .idx size mismatch — source %d bytes, target %d bytes",
|
|
volumeId, sourceStatus.IdxFileSize, targetStatus.IdxFileSize)
|
|
}
|
|
|
|
// Step 6: Delete from source — after this, the move is committed.
|
|
// Clear the readonly flag so the defer doesn't try to restore writability.
|
|
t.ReportProgress(90.0)
|
|
t.GetLogger().Info("Deleting volume from source server")
|
|
if err := t.deleteVolume(ctx, sourceServer, volumeId); err != nil {
|
|
return fmt.Errorf("failed to delete volume from source: %v", err)
|
|
}
|
|
sourceMarkedReadonly = false
|
|
|
|
t.ReportProgress(100.0)
|
|
glog.Infof("Balance task completed successfully: volume %d moved from %s to %s",
|
|
t.volumeID, sourceNode, destNode)
|
|
return nil
|
|
}
|
|
|
|
// Validate implements the UnifiedTask interface
|
|
func (t *BalanceTask) Validate(params *worker_pb.TaskParams) error {
|
|
if params == nil {
|
|
return fmt.Errorf("task parameters are required")
|
|
}
|
|
|
|
balanceParams := params.GetBalanceParams()
|
|
if balanceParams == nil {
|
|
return fmt.Errorf("balance parameters are required")
|
|
}
|
|
|
|
if params.VolumeId != t.volumeID {
|
|
return fmt.Errorf("volume ID mismatch: expected %d, got %d", t.volumeID, params.VolumeId)
|
|
}
|
|
|
|
// Validate that at least one source matches our server
|
|
found := false
|
|
for _, source := range params.Sources {
|
|
if source.Node == t.server {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
return fmt.Errorf("no source matches expected server %s", t.server)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// EstimateTime implements the UnifiedTask interface
|
|
func (t *BalanceTask) EstimateTime(params *worker_pb.TaskParams) time.Duration {
|
|
// Basic estimate based on simulated steps
|
|
return 14 * time.Second // Sum of all step durations
|
|
}
|
|
|
|
// GetProgress returns current progress
|
|
func (t *BalanceTask) GetProgress() float64 {
|
|
return t.progress
|
|
}
|
|
|
|
// Helper methods for real balance operations
|
|
|
|
// markVolumeReadonly marks the volume readonly on the source server.
|
|
func (t *BalanceTask) markVolumeReadonly(ctx context.Context, server pb.ServerAddress, volumeId needle.VolumeId) error {
|
|
return operation.WithVolumeServerClient(false, server, t.grpcDialOption,
|
|
func(client volume_server_pb.VolumeServerClient) error {
|
|
_, err := client.VolumeMarkReadonly(ctx, &volume_server_pb.VolumeMarkReadonlyRequest{
|
|
VolumeId: uint32(volumeId),
|
|
})
|
|
return err
|
|
})
|
|
}
|
|
|
|
// markVolumeWritable restores the volume to writable on the source server.
|
|
func (t *BalanceTask) markVolumeWritable(ctx context.Context, server pb.ServerAddress, volumeId needle.VolumeId) error {
|
|
return operation.WithVolumeServerClient(false, server, t.grpcDialOption,
|
|
func(client volume_server_pb.VolumeServerClient) error {
|
|
_, err := client.VolumeMarkWritable(ctx, &volume_server_pb.VolumeMarkWritableRequest{
|
|
VolumeId: uint32(volumeId),
|
|
})
|
|
return err
|
|
})
|
|
}
|
|
|
|
// copyVolume copies volume from source to target server.
|
|
func (t *BalanceTask) copyVolume(ctx context.Context, sourceServer, targetServer pb.ServerAddress, volumeId needle.VolumeId) (uint64, error) {
|
|
var lastAppendAtNs uint64
|
|
|
|
err := operation.WithVolumeServerClient(true, targetServer, t.grpcDialOption,
|
|
func(client volume_server_pb.VolumeServerClient) error {
|
|
stream, err := client.VolumeCopy(ctx, &volume_server_pb.VolumeCopyRequest{
|
|
VolumeId: uint32(volumeId),
|
|
SourceDataNode: string(sourceServer),
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for {
|
|
resp, recvErr := stream.Recv()
|
|
if recvErr != nil {
|
|
if recvErr == io.EOF {
|
|
break
|
|
}
|
|
return recvErr
|
|
}
|
|
|
|
if resp.LastAppendAtNs != 0 {
|
|
lastAppendAtNs = resp.LastAppendAtNs
|
|
} else {
|
|
// Report copy progress
|
|
glog.V(1).Infof("Volume %d copy progress: %s", volumeId,
|
|
util.BytesToHumanReadable(uint64(resp.ProcessedBytes)))
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
return lastAppendAtNs, err
|
|
}
|
|
|
|
// tailVolume syncs remaining updates from source to target.
|
|
func (t *BalanceTask) tailVolume(ctx context.Context, sourceServer, targetServer pb.ServerAddress, volumeId needle.VolumeId, sinceNs uint64) error {
|
|
return operation.WithVolumeServerClient(true, targetServer, t.grpcDialOption,
|
|
func(client volume_server_pb.VolumeServerClient) error {
|
|
_, err := client.VolumeTailReceiver(ctx, &volume_server_pb.VolumeTailReceiverRequest{
|
|
VolumeId: uint32(volumeId),
|
|
SinceNs: sinceNs,
|
|
IdleTimeoutSeconds: 60, // 1 minute timeout
|
|
SourceVolumeServer: string(sourceServer),
|
|
})
|
|
return err
|
|
})
|
|
}
|
|
|
|
// readVolumeFileStatus reads the volume's file status (sizes, file count) from a server.
|
|
func (t *BalanceTask) readVolumeFileStatus(ctx context.Context, server pb.ServerAddress, volumeId needle.VolumeId) (*volume_server_pb.ReadVolumeFileStatusResponse, error) {
|
|
var resp *volume_server_pb.ReadVolumeFileStatusResponse
|
|
err := operation.WithVolumeServerClient(false, server, t.grpcDialOption,
|
|
func(client volume_server_pb.VolumeServerClient) error {
|
|
var err error
|
|
resp, err = client.ReadVolumeFileStatus(ctx,
|
|
&volume_server_pb.ReadVolumeFileStatusRequest{
|
|
VolumeId: uint32(volumeId),
|
|
})
|
|
return err
|
|
})
|
|
return resp, err
|
|
}
|
|
|
|
// deleteVolume deletes the volume from the server.
|
|
func (t *BalanceTask) deleteVolume(ctx context.Context, server pb.ServerAddress, volumeId needle.VolumeId) error {
|
|
return operation.WithVolumeServerClient(false, server, t.grpcDialOption,
|
|
func(client volume_server_pb.VolumeServerClient) error {
|
|
_, err := client.VolumeDelete(ctx, &volume_server_pb.VolumeDeleteRequest{
|
|
VolumeId: uint32(volumeId),
|
|
OnlyEmpty: false,
|
|
})
|
|
return err
|
|
})
|
|
}
|