* handle incomplete ec encoding * unit tests * simplify, and better logs * Update disk_location_ec.go When loadEcShards() fails partway through, some EC shards may already be loaded into the l.ecVolumes map in memory. The previous code only cleaned up filesystem files but left orphaned in-memory state, which could cause memory leaks and inconsistent state. * address comments * Performance: Avoid Double os.Stat() Call * Platform Compatibility: Use filepath.Join * in memory cleanup * Update disk_location_ec.go * refactor * Added Shard Size Validation * check ec shard sizes * validate shard size * calculate expected shard size * refactoring * minor * fix shard directory * 10GB sparse files can be slow or fail on non-sparse FS. Use 10MB to hit SmallBlockSize math (1MB shards) deterministically. * grouping logic should be updated to use both collection and volumeId to ensure correctness * unexpected error * handle exceptions in tests; use constants * The check for orphaned shards should be performed for the previous volume before resetting sameVolumeShards for the new volume. * address comments * Eliminated Redundant Parsing in checkOrphanedShards * minor * Avoid misclassifying local EC as distributed when .dat stat errors occur; also standardize unload-before-remove. * fmt * refactor * refactor * adjust to warning
199 lines
5.5 KiB
Go
199 lines
5.5 KiB
Go
package storage
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
|
)
|
|
|
|
// TestCalculateExpectedShardSizeWithRealEncoding validates our shard size calculation
|
|
// by actually running EC encoding on real files and comparing the results
|
|
func TestCalculateExpectedShardSizeWithRealEncoding(t *testing.T) {
|
|
tempDir := t.TempDir()
|
|
|
|
tests := []struct {
|
|
name string
|
|
datFileSize int64
|
|
description string
|
|
}{
|
|
{
|
|
name: "5MB file",
|
|
datFileSize: 5 * 1024 * 1024,
|
|
description: "Small file that needs 1 small block per shard",
|
|
},
|
|
{
|
|
name: "10MB file (exactly 10 small blocks)",
|
|
datFileSize: 10 * 1024 * 1024,
|
|
description: "Exactly fits in 1MB small blocks",
|
|
},
|
|
{
|
|
name: "15MB file",
|
|
datFileSize: 15 * 1024 * 1024,
|
|
description: "Requires 2 small blocks per shard",
|
|
},
|
|
{
|
|
name: "50MB file",
|
|
datFileSize: 50 * 1024 * 1024,
|
|
description: "Requires 5 small blocks per shard",
|
|
},
|
|
{
|
|
name: "100MB file",
|
|
datFileSize: 100 * 1024 * 1024,
|
|
description: "Requires 10 small blocks per shard",
|
|
},
|
|
{
|
|
name: "512MB file",
|
|
datFileSize: 512 * 1024 * 1024,
|
|
description: "Requires 52 small blocks per shard (rounded up)",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
// Create a test .dat file with the specified size
|
|
baseFileName := filepath.Join(tempDir, "test_volume")
|
|
datFileName := baseFileName + ".dat"
|
|
|
|
// Create .dat file with random data pattern (so it's compressible but realistic)
|
|
datFile, err := os.Create(datFileName)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create .dat file: %v", err)
|
|
}
|
|
|
|
// Write some pattern data (not all zeros, to be more realistic)
|
|
pattern := make([]byte, 4096)
|
|
for i := range pattern {
|
|
pattern[i] = byte(i % 256)
|
|
}
|
|
|
|
written := int64(0)
|
|
for written < tt.datFileSize {
|
|
toWrite := tt.datFileSize - written
|
|
if toWrite > int64(len(pattern)) {
|
|
toWrite = int64(len(pattern))
|
|
}
|
|
n, err := datFile.Write(pattern[:toWrite])
|
|
if err != nil {
|
|
t.Fatalf("Failed to write to .dat file: %v", err)
|
|
}
|
|
written += int64(n)
|
|
}
|
|
datFile.Close()
|
|
|
|
// Calculate expected shard size using our function
|
|
expectedShardSize := calculateExpectedShardSize(tt.datFileSize)
|
|
|
|
// Run actual EC encoding
|
|
err = erasure_coding.WriteEcFiles(baseFileName)
|
|
if err != nil {
|
|
t.Fatalf("Failed to encode EC files: %v", err)
|
|
}
|
|
|
|
// Measure actual shard sizes
|
|
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
|
|
shardFileName := baseFileName + erasure_coding.ToExt(i)
|
|
shardInfo, err := os.Stat(shardFileName)
|
|
if err != nil {
|
|
t.Fatalf("Failed to stat shard file %s: %v", shardFileName, err)
|
|
}
|
|
|
|
actualShardSize := shardInfo.Size()
|
|
|
|
// Verify actual size matches expected size
|
|
if actualShardSize != expectedShardSize {
|
|
t.Errorf("Shard %d size mismatch:\n"+
|
|
" .dat file size: %d bytes\n"+
|
|
" Expected shard size: %d bytes\n"+
|
|
" Actual shard size: %d bytes\n"+
|
|
" Difference: %d bytes\n"+
|
|
" %s",
|
|
i, tt.datFileSize, expectedShardSize, actualShardSize,
|
|
actualShardSize-expectedShardSize, tt.description)
|
|
}
|
|
}
|
|
|
|
// If we got here, all shards match!
|
|
t.Logf("✓ SUCCESS: .dat size %d → actual shard size %d matches calculated size (%s)",
|
|
tt.datFileSize, expectedShardSize, tt.description)
|
|
|
|
// Cleanup
|
|
os.Remove(datFileName)
|
|
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
|
|
os.Remove(baseFileName + erasure_coding.ToExt(i))
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestCalculateExpectedShardSizeEdgeCases tests edge cases with real encoding
|
|
func TestCalculateExpectedShardSizeEdgeCases(t *testing.T) {
|
|
tempDir := t.TempDir()
|
|
|
|
tests := []struct {
|
|
name string
|
|
datFileSize int64
|
|
}{
|
|
{"1 byte file", 1},
|
|
{"1KB file", 1024},
|
|
{"10KB file", 10 * 1024},
|
|
{"1MB file (1 small block)", 1024 * 1024},
|
|
{"1MB + 1 byte", 1024*1024 + 1},
|
|
{"9.9MB (almost 1 small block per shard)", 9*1024*1024 + 900*1024},
|
|
{"10.1MB (just over 1 small block per shard)", 10*1024*1024 + 100*1024},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
baseFileName := filepath.Join(tempDir, tt.name)
|
|
datFileName := baseFileName + ".dat"
|
|
|
|
// Create .dat file
|
|
datFile, err := os.Create(datFileName)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create .dat file: %v", err)
|
|
}
|
|
|
|
// Write exactly the specified number of bytes
|
|
data := make([]byte, tt.datFileSize)
|
|
for i := range data {
|
|
data[i] = byte(i % 256)
|
|
}
|
|
datFile.Write(data)
|
|
datFile.Close()
|
|
|
|
// Calculate expected
|
|
expectedShardSize := calculateExpectedShardSize(tt.datFileSize)
|
|
|
|
// Run actual EC encoding
|
|
err = erasure_coding.WriteEcFiles(baseFileName)
|
|
if err != nil {
|
|
t.Fatalf("Failed to encode EC files: %v", err)
|
|
}
|
|
|
|
// Check first shard (all should be same size)
|
|
shardFileName := baseFileName + erasure_coding.ToExt(0)
|
|
shardInfo, err := os.Stat(shardFileName)
|
|
if err != nil {
|
|
t.Fatalf("Failed to stat shard file: %v", err)
|
|
}
|
|
|
|
actualShardSize := shardInfo.Size()
|
|
|
|
if actualShardSize != expectedShardSize {
|
|
t.Errorf("File size %d: expected shard %d, got %d (diff: %d)",
|
|
tt.datFileSize, expectedShardSize, actualShardSize, actualShardSize-expectedShardSize)
|
|
} else {
|
|
t.Logf("✓ File size %d → shard size %d (correct)", tt.datFileSize, actualShardSize)
|
|
}
|
|
|
|
// Cleanup
|
|
os.Remove(datFileName)
|
|
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
|
|
os.Remove(baseFileName + erasure_coding.ToExt(i))
|
|
}
|
|
})
|
|
}
|
|
}
|