Process .ecj deletions during EC decode and vacuum decoded volume (#8863)

* Process .ecj deletions during EC decode and vacuum decoded volume (#8798)

When decoding EC volumes back to normal volumes, deletions recorded in
the .ecj journal were not being applied before computing the dat file
size or checking for live needles. This caused the decoded volume to
include data for deleted files and could produce false positives in the
all-deleted check.

- Call RebuildEcxFile before HasLiveNeedles/FindDatFileSize in
  VolumeEcShardsToVolume so .ecj deletions are merged into .ecx first
- Vacuum the decoded volume after mounting in ec.decode to compact out
  deleted needle data from the .dat file
- Add integration tests for decoding with non-empty .ecj files

* storage: add offline volume compaction helper

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* ec: compact decoded volumes before deleting shards

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* ec: address PR review comments

- Fall back to data directory for .ecx when idx directory lacks it
- Make compaction failure non-fatal during EC decode
- Remove misleading "buffer: 10%" from space check error message

* ec: collect .ecj from all shard locations during decode

Each server's .ecj only contains deletions for needles whose data
resides in shards held by that server. Previously, sources with no
new data shards to contribute were skipped entirely, losing their
.ecj deletion entries. Now .ecj is always appended from every shard
location so RebuildEcxFile sees the full set of deletions.

* ec: add integration tests for .ecj collection during decode

TestEcDecodePreservesDeletedNeedles: verifies that needles deleted
via VolumeEcBlobDelete are excluded from the decoded volume.

TestEcDecodeCollectsEcjFromPeer: regression test for the fix in
collectEcShards. Deletes a needle only on a peer server that holds
no new data shards, then verifies the deletion survives decode via
.ecj collection.

* ec: address review nits in decode and tests

- Remove double error wrapping in mountDecodedVolume
- Check VolumeUnmount error in peer ecj test
- Assert 404 specifically for deleted needles, fail on 5xx

---------

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Chris Lu
2026-04-01 01:15:26 -07:00
committed by GitHub
parent 80d3085d54
commit af68449a26
7 changed files with 741 additions and 44 deletions

View File

@@ -169,8 +169,14 @@ func doEcDecode(commandEnv *CommandEnv, topoInfo *master_pb.TopologyInfo, collec
return fmt.Errorf("generate normal volume %d on %s: %v", vid, targetNodeLocation, err)
}
// mount the decoded volume after server-side offline compaction succeeded
err = mountDecodedVolume(commandEnv.option.GrpcDialOption, targetNodeLocation, vid)
if err != nil {
return fmt.Errorf("mount decoded volume %d on %s: %v", vid, targetNodeLocation, err)
}
// delete the previous ec shards
err = mountVolumeAndDeleteEcShards(commandEnv.option.GrpcDialOption, collection, targetNodeLocation, nodeToEcShardsInfo, vid)
err = unmountAndDeleteEcShardsWithPrefix("deleteDecodedEcShards", commandEnv.option.GrpcDialOption, collection, nodeToEcShardsInfo, vid)
if err != nil {
return fmt.Errorf("delete ec shards for volume %d: %v", vid, err)
}
@@ -219,23 +225,16 @@ func unmountAndDeleteEcShardsWithPrefix(prefix string, grpcDialOption grpc.DialO
return ewg.Wait()
}
func mountVolumeAndDeleteEcShards(grpcDialOption grpc.DialOption, collection string, targetNodeLocation pb.ServerAddress, nodeToShardsInfo map[pb.ServerAddress]*erasure_coding.ShardsInfo, vid needle.VolumeId) error {
// mount volume
if err := operation.WithVolumeServerClient(false, targetNodeLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
func mountDecodedVolume(grpcDialOption grpc.DialOption, targetNodeLocation pb.ServerAddress, vid needle.VolumeId) error {
return operation.WithVolumeServerClient(false, targetNodeLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
_, mountErr := volumeServerClient.VolumeMount(context.Background(), &volume_server_pb.VolumeMountRequest{
VolumeId: uint32(vid),
})
return mountErr
}); err != nil {
return fmt.Errorf("mountVolumeAndDeleteEcShards mount volume %d on %s: %v", vid, targetNodeLocation, err)
}
return unmountAndDeleteEcShardsWithPrefix("mountVolumeAndDeleteEcShards", grpcDialOption, collection, nodeToShardsInfo, vid)
})
}
func generateNormalVolume(grpcDialOption grpc.DialOption, vid needle.VolumeId, collection string, sourceVolumeServer pb.ServerAddress) error {
fmt.Printf("generateNormalVolume from ec volume %d on %s\n", vid, sourceVolumeServer)
err := operation.WithVolumeServerClient(false, sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
@@ -280,13 +279,18 @@ func collectEcShards(commandEnv *CommandEnv, nodeToShardsInfo map[pb.ServerAddre
}
needToCopyShardsInfo := si.Minus(existingShardsInfo).MinusParityShards()
if needToCopyShardsInfo.Count() == 0 {
continue
}
err = operation.WithVolumeServerClient(false, targetNodeLocation, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
fmt.Printf("copy %d.%v %s => %s\n", vid, needToCopyShardsInfo.Ids(), loc, targetNodeLocation)
// Always collect .ecj from every shard location. Each server's .ecj
// only contains deletions for needles whose data resides in shards
// held by that server. Without merging all .ecj files, deletions
// recorded on other servers would be lost during decode.
if needToCopyShardsInfo.Count() > 0 {
fmt.Printf("copy %d.%v %s => %s\n", vid, needToCopyShardsInfo.Ids(), loc, targetNodeLocation)
} else {
fmt.Printf("collect ecj %d %s => %s\n", vid, loc, targetNodeLocation)
}
_, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{
VolumeId: uint32(vid),
@@ -294,21 +298,23 @@ func collectEcShards(commandEnv *CommandEnv, nodeToShardsInfo map[pb.ServerAddre
ShardIds: needToCopyShardsInfo.IdsUint32(),
CopyEcxFile: false,
CopyEcjFile: true,
CopyVifFile: true,
CopyVifFile: needToCopyShardsInfo.Count() > 0,
SourceDataNode: string(loc),
})
if copyErr != nil {
return fmt.Errorf("copy %d.%v %s => %s : %v\n", vid, needToCopyShardsInfo.Ids(), loc, targetNodeLocation, copyErr)
}
fmt.Printf("mount %d.%v on %s\n", vid, needToCopyShardsInfo.Ids(), targetNodeLocation)
_, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
VolumeId: uint32(vid),
Collection: collection,
ShardIds: needToCopyShardsInfo.IdsUint32(),
})
if mountErr != nil {
return fmt.Errorf("mount %d.%v on %s : %v\n", vid, needToCopyShardsInfo.Ids(), targetNodeLocation, mountErr)
if needToCopyShardsInfo.Count() > 0 {
fmt.Printf("mount %d.%v on %s\n", vid, needToCopyShardsInfo.Ids(), targetNodeLocation)
_, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
VolumeId: uint32(vid),
Collection: collection,
ShardIds: needToCopyShardsInfo.IdsUint32(),
})
if mountErr != nil {
return fmt.Errorf("mount %d.%v on %s : %v\n", vid, needToCopyShardsInfo.Ids(), targetNodeLocation, mountErr)
}
}
return nil