Update store_ec_recovery_test.go
This commit is contained in:
@@ -26,9 +26,9 @@ func TestRecoverOneRemoteEcShardInterval_SufficientShards(t *testing.T) {
|
|||||||
// This test simulates the improved diagnostics when there are sufficient shards
|
// This test simulates the improved diagnostics when there are sufficient shards
|
||||||
// We can't easily test the full recovery without mocking the network calls,
|
// We can't easily test the full recovery without mocking the network calls,
|
||||||
// but we can validate the logic for counting available shards
|
// but we can validate the logic for counting available shards
|
||||||
|
|
||||||
shardIdToRecover := erasure_coding.ShardId(5)
|
shardIdToRecover := erasure_coding.ShardId(5)
|
||||||
|
|
||||||
// Create shard locations with all shards except the one to recover
|
// Create shard locations with all shards except the one to recover
|
||||||
shardLocations := make(map[erasure_coding.ShardId][]pb.ServerAddress)
|
shardLocations := make(map[erasure_coding.ShardId][]pb.ServerAddress)
|
||||||
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
|
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
|
||||||
@@ -36,7 +36,7 @@ func TestRecoverOneRemoteEcShardInterval_SufficientShards(t *testing.T) {
|
|||||||
shardLocations[erasure_coding.ShardId(i)] = []pb.ServerAddress{"localhost:8080"}
|
shardLocations[erasure_coding.ShardId(i)] = []pb.ServerAddress{"localhost:8080"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify we have enough shards for recovery
|
// Verify we have enough shards for recovery
|
||||||
availableCount := 0
|
availableCount := 0
|
||||||
for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
|
for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
|
||||||
@@ -44,18 +44,18 @@ func TestRecoverOneRemoteEcShardInterval_SufficientShards(t *testing.T) {
|
|||||||
availableCount++
|
availableCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if availableCount < erasure_coding.DataShardsCount {
|
if availableCount < erasure_coding.DataShardsCount {
|
||||||
t.Errorf("Expected at least %d shards, got %d", erasure_coding.DataShardsCount, availableCount)
|
t.Errorf("Expected at least %d shards, got %d", erasure_coding.DataShardsCount, availableCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Logf("Successfully identified %d available shards (need %d)", availableCount, erasure_coding.DataShardsCount)
|
t.Logf("Successfully identified %d available shards (need %d)", availableCount, erasure_coding.DataShardsCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestRecoverOneRemoteEcShardInterval_InsufficientShards tests recovery failure with too few shards
|
// TestRecoverOneRemoteEcShardInterval_InsufficientShards tests recovery failure with too few shards
|
||||||
func TestRecoverOneRemoteEcShardInterval_InsufficientShards(t *testing.T) {
|
func TestRecoverOneRemoteEcShardInterval_InsufficientShards(t *testing.T) {
|
||||||
shardIdToRecover := erasure_coding.ShardId(5)
|
shardIdToRecover := erasure_coding.ShardId(5)
|
||||||
|
|
||||||
// Create shard locations with only 8 shards (less than DataShardsCount=10)
|
// Create shard locations with only 8 shards (less than DataShardsCount=10)
|
||||||
shardLocations := make(map[erasure_coding.ShardId][]pb.ServerAddress)
|
shardLocations := make(map[erasure_coding.ShardId][]pb.ServerAddress)
|
||||||
for i := 0; i < 8; i++ {
|
for i := 0; i < 8; i++ {
|
||||||
@@ -63,7 +63,7 @@ func TestRecoverOneRemoteEcShardInterval_InsufficientShards(t *testing.T) {
|
|||||||
shardLocations[erasure_coding.ShardId(i)] = []pb.ServerAddress{"localhost:8080"}
|
shardLocations[erasure_coding.ShardId(i)] = []pb.ServerAddress{"localhost:8080"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count available shards
|
// Count available shards
|
||||||
availableCount := 0
|
availableCount := 0
|
||||||
for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
|
for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
|
||||||
@@ -71,12 +71,12 @@ func TestRecoverOneRemoteEcShardInterval_InsufficientShards(t *testing.T) {
|
|||||||
availableCount++
|
availableCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify we don't have enough shards
|
// Verify we don't have enough shards
|
||||||
if availableCount >= erasure_coding.DataShardsCount {
|
if availableCount >= erasure_coding.DataShardsCount {
|
||||||
t.Errorf("Test setup error: expected less than %d shards, got %d", erasure_coding.DataShardsCount, availableCount)
|
t.Errorf("Test setup error: expected less than %d shards, got %d", erasure_coding.DataShardsCount, availableCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Logf("Correctly identified insufficient shards: %d available (need %d)", availableCount, erasure_coding.DataShardsCount)
|
t.Logf("Correctly identified insufficient shards: %d available (need %d)", availableCount, erasure_coding.DataShardsCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -369,20 +369,20 @@ func TestRecoverOneRemoteEcShardInterval_BuggyMaxShardCount(t *testing.T) {
|
|||||||
// This test would have failed with the original buggy code that iterated up to MaxShardCount
|
// This test would have failed with the original buggy code that iterated up to MaxShardCount
|
||||||
// The bug: if bufs[15..31] had non-nil values, they would be counted as "available"
|
// The bug: if bufs[15..31] had non-nil values, they would be counted as "available"
|
||||||
// even though they should be ignored (only indices 0-13 matter for TotalShardsCount=14)
|
// even though they should be ignored (only indices 0-13 matter for TotalShardsCount=14)
|
||||||
|
|
||||||
bufs := make([][]byte, erasure_coding.MaxShardCount)
|
bufs := make([][]byte, erasure_coding.MaxShardCount)
|
||||||
|
|
||||||
// Set up only 9 valid shards (less than DataShardsCount=10)
|
// Set up only 9 valid shards (less than DataShardsCount=10)
|
||||||
for i := 0; i < 9; i++ {
|
for i := 0; i < 9; i++ {
|
||||||
bufs[i] = make([]byte, 1024)
|
bufs[i] = make([]byte, 1024)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CRITICAL: Set garbage data in indices beyond TotalShardsCount
|
// CRITICAL: Set garbage data in indices beyond TotalShardsCount
|
||||||
// The buggy code would count these, making it think we have enough shards
|
// The buggy code would count these, making it think we have enough shards
|
||||||
for i := erasure_coding.TotalShardsCount; i < erasure_coding.MaxShardCount; i++ {
|
for i := erasure_coding.TotalShardsCount; i < erasure_coding.MaxShardCount; i++ {
|
||||||
bufs[i] = make([]byte, 1024) // This should be IGNORED
|
bufs[i] = make([]byte, 1024) // This should be IGNORED
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count using the CORRECTED logic (should only check 0..TotalShardsCount-1)
|
// Count using the CORRECTED logic (should only check 0..TotalShardsCount-1)
|
||||||
availableShards := make([]erasure_coding.ShardId, 0, erasure_coding.TotalShardsCount)
|
availableShards := make([]erasure_coding.ShardId, 0, erasure_coding.TotalShardsCount)
|
||||||
missingShards := make([]erasure_coding.ShardId, 0, erasure_coding.ParityShardsCount+1)
|
missingShards := make([]erasure_coding.ShardId, 0, erasure_coding.ParityShardsCount+1)
|
||||||
@@ -393,16 +393,16 @@ func TestRecoverOneRemoteEcShardInterval_BuggyMaxShardCount(t *testing.T) {
|
|||||||
missingShards = append(missingShards, erasure_coding.ShardId(shardId))
|
missingShards = append(missingShards, erasure_coding.ShardId(shardId))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// With corrected code: should have 9 available shards (insufficient)
|
// With corrected code: should have 9 available shards (insufficient)
|
||||||
if len(availableShards) != 9 {
|
if len(availableShards) != 9 {
|
||||||
t.Errorf("Expected 9 available shards, got %d", len(availableShards))
|
t.Errorf("Expected 9 available shards, got %d", len(availableShards))
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(availableShards) >= erasure_coding.DataShardsCount {
|
if len(availableShards) >= erasure_coding.DataShardsCount {
|
||||||
t.Errorf("CRITICAL BUG: Incorrectly counted buffers beyond TotalShardsCount as available!")
|
t.Errorf("CRITICAL BUG: Incorrectly counted buffers beyond TotalShardsCount as available!")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count using the BUGGY logic (what the old code did)
|
// Count using the BUGGY logic (what the old code did)
|
||||||
buggyAvailableCount := 0
|
buggyAvailableCount := 0
|
||||||
for shardId := 0; shardId < erasure_coding.MaxShardCount; shardId++ {
|
for shardId := 0; shardId < erasure_coding.MaxShardCount; shardId++ {
|
||||||
@@ -410,13 +410,13 @@ func TestRecoverOneRemoteEcShardInterval_BuggyMaxShardCount(t *testing.T) {
|
|||||||
buggyAvailableCount++
|
buggyAvailableCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The buggy code would have counted 9 + 18 = 27 shards (WRONG!)
|
// The buggy code would have counted 9 + 18 = 27 shards (WRONG!)
|
||||||
if buggyAvailableCount != 27 {
|
if buggyAvailableCount != 27 {
|
||||||
t.Errorf("Expected buggy logic to count 27 shards, got %d", buggyAvailableCount)
|
t.Errorf("Expected buggy logic to count 27 shards, got %d", buggyAvailableCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Logf("✅ Corrected code: %d shards (correct, insufficient)", len(availableShards))
|
t.Logf("Corrected code: %d shards (correct, insufficient)", len(availableShards))
|
||||||
t.Logf("❌ Buggy code would have counted: %d shards (incorrect, falsely sufficient)", buggyAvailableCount)
|
t.Logf("Buggy code would have counted: %d shards (incorrect, falsely sufficient)", buggyAvailableCount)
|
||||||
t.Logf("Missing shards: %v", missingShards)
|
t.Logf("Missing shards: %v", missingShards)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user