fix(worker): pass compaction revision and file sizes in EC volume copy (#8835)
* fix(worker): pass compaction revision and file sizes in EC volume copy The worker EC task was sending CopyFile requests without the current compaction revision (defaulting to 0) and with StopOffset set to math.MaxInt64. After a vacuum compaction this caused the volume server to reject the copy or return stale data. Read the volume file status first and forward the compaction revision and actual file sizes so the copy is consistent with the compacted volume. * propagate erasure coding task context * fix(worker): validate volume file status and detect short copies Reject zero dat file size from ReadVolumeFileStatus — a zero-sized snapshot would produce 0-byte copies and broken EC shards. After streaming, verify totalBytes matches the expected stopOffset and return an error on short copies instead of logging success. * fix(worker): reject zero idx file size in volume status validation A non-empty dat with zero idx indicates an empty or corrupt volume. Without this guard, copyFileFromSource gets stopOffset=0, produces a 0-byte .idx, passes the short-copy check, and generateEcShardsLocally runs against a volume with no index. * fix fake plugin volume file status * fix plugin volume balance test fixtures
This commit is contained in:
@@ -11,9 +11,11 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/operation"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
)
|
||||
|
||||
// VolumeServer provides a minimal volume server for erasure coding tests.
|
||||
@@ -196,12 +198,25 @@ func (v *VolumeServer) CopyFile(req *volume_server_pb.CopyFileRequest, stream vo
|
||||
defer file.Close()
|
||||
|
||||
buf := make([]byte, 64*1024)
|
||||
remaining := int64(req.GetStopOffset())
|
||||
for {
|
||||
n, readErr := file.Read(buf)
|
||||
if remaining == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
readBuf := buf
|
||||
if remaining > 0 && remaining < int64(len(buf)) {
|
||||
readBuf = buf[:remaining]
|
||||
}
|
||||
|
||||
n, readErr := file.Read(readBuf)
|
||||
if n > 0 {
|
||||
if err := stream.Send(&volume_server_pb.CopyFileResponse{FileContent: buf[:n]}); err != nil {
|
||||
if err := stream.Send(&volume_server_pb.CopyFileResponse{FileContent: readBuf[:n]}); err != nil {
|
||||
return err
|
||||
}
|
||||
if remaining > 0 {
|
||||
remaining -= int64(n)
|
||||
}
|
||||
}
|
||||
if readErr == io.EOF {
|
||||
break
|
||||
@@ -307,10 +322,21 @@ func (v *VolumeServer) ReadVolumeFileStatus(ctx context.Context, req *volume_ser
|
||||
v.mu.Lock()
|
||||
v.readFileStatusCalls++
|
||||
v.mu.Unlock()
|
||||
|
||||
datInfo, err := os.Stat(v.filePath(req.VolumeId, ".dat"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
idxInfo, err := os.Stat(v.filePath(req.VolumeId, ".idx"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &volume_server_pb.ReadVolumeFileStatusResponse{
|
||||
VolumeId: req.VolumeId,
|
||||
DatFileSize: 1024,
|
||||
IdxFileSize: 16,
|
||||
DatFileSize: uint64(datInfo.Size()),
|
||||
IdxFileSize: uint64(idxInfo.Size()),
|
||||
FileCount: 1,
|
||||
}, nil
|
||||
}
|
||||
@@ -349,7 +375,27 @@ func (v *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, strea
|
||||
v.volumeCopyCalls++
|
||||
v.mu.Unlock()
|
||||
|
||||
if err := stream.Send(&volume_server_pb.VolumeCopyResponse{ProcessedBytes: 1024}); err != nil {
|
||||
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
|
||||
var statusResp *volume_server_pb.ReadVolumeFileStatusResponse
|
||||
if err := operation.WithVolumeServerClient(false, pb.ServerAddress(req.SourceDataNode), dialOption,
|
||||
func(client volume_server_pb.VolumeServerClient) error {
|
||||
var readErr error
|
||||
statusResp, readErr = client.ReadVolumeFileStatus(stream.Context(), &volume_server_pb.ReadVolumeFileStatusRequest{
|
||||
VolumeId: req.VolumeId,
|
||||
})
|
||||
return readErr
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := v.copyRemoteFile(stream.Context(), req.SourceDataNode, req.VolumeId, ".dat", statusResp.DatFileSize, dialOption); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.copyRemoteFile(stream.Context(), req.SourceDataNode, req.VolumeId, ".idx", statusResp.IdxFileSize, dialOption); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := stream.Send(&volume_server_pb.VolumeCopyResponse{ProcessedBytes: int64(statusResp.DatFileSize + statusResp.IdxFileSize)}); err != nil {
|
||||
return err
|
||||
}
|
||||
return stream.Send(&volume_server_pb.VolumeCopyResponse{LastAppendAtNs: uint64(time.Now().UnixNano())})
|
||||
@@ -368,3 +414,44 @@ func (v *VolumeServer) VolumeTailReceiver(ctx context.Context, req *volume_serve
|
||||
v.mu.Unlock()
|
||||
return &volume_server_pb.VolumeTailReceiverResponse{}, nil
|
||||
}
|
||||
|
||||
func (v *VolumeServer) copyRemoteFile(ctx context.Context, sourceDataNode string, volumeID uint32, ext string, fileSize uint64, dialOption grpc.DialOption) error {
|
||||
path := v.filePath(volumeID, ext)
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
file, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
return operation.WithVolumeServerClient(true, pb.ServerAddress(sourceDataNode), dialOption,
|
||||
func(client volume_server_pb.VolumeServerClient) error {
|
||||
stream, err := client.CopyFile(ctx, &volume_server_pb.CopyFileRequest{
|
||||
VolumeId: volumeID,
|
||||
Ext: ext,
|
||||
StopOffset: fileSize,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for {
|
||||
resp, recvErr := stream.Recv()
|
||||
if recvErr == io.EOF {
|
||||
return nil
|
||||
}
|
||||
if recvErr != nil {
|
||||
return recvErr
|
||||
}
|
||||
if len(resp.FileContent) == 0 {
|
||||
continue
|
||||
}
|
||||
if _, err := file.Write(resp.FileContent); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user