filer: improve FoundationDB performance by disabling batch by default (#7770)
* filer: improve FoundationDB performance by disabling batch by default This PR addresses a performance issue where FoundationDB filer was achieving only ~757 ops/sec with 12 concurrent S3 clients, despite FDB being capable of 17,000+ ops/sec. Root cause: The write batcher was waiting up to 5ms for each operation to batch, even though S3 semantics require waiting for durability confirmation. This added artificial latency that defeated the purpose of batching. Changes: - Disable write batching by default (batch_enabled = false) - Each write now commits immediately in its own transaction - Reduce batch interval from 5ms to 1ms when batching is enabled - Add batch_enabled config option to toggle behavior - Improve batcher to collect available ops without blocking - Add benchmarks comparing batch vs no-batch performance Benchmark results (16 concurrent goroutines): - With batch: 2,924 ops/sec (342,032 ns/op) - Without batch: 4,625 ops/sec (216,219 ns/op) - Improvement: +58% faster Configuration: - Default: batch_enabled = false (optimal for S3 PUT latency) - For bulk ingestion: set batch_enabled = true Also fixes ARM64 Docker test setup (shell compatibility, fdbserver path). * fix: address review comments - use atomic counter and remove duplicate batcher - Use sync/atomic.Uint64 for unique filenames in concurrent benchmarks - Remove duplicate batcher creation in createBenchmarkStoreWithBatching (initialize() already creates batcher when batchEnabled=true) * fix: add realistic default values to benchmark store helper Set directoryPrefix, timeout, and maxRetryDelay to reasonable defaults for more realistic benchmark conditions.
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -325,6 +326,120 @@ func BenchmarkFoundationDBStore_KvOperations(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkFoundationDBStore_InsertEntry_NoBatch benchmarks insert performance
|
||||
// with batching disabled (direct commit mode - optimal for S3 PUT latency)
|
||||
func BenchmarkFoundationDBStore_InsertEntry_NoBatch(b *testing.B) {
|
||||
store := createBenchmarkStoreWithBatching(b, false, 100, 1*time.Millisecond)
|
||||
defer store.Shutdown()
|
||||
|
||||
ctx := context.Background()
|
||||
entry := &filer.Entry{
|
||||
FullPath: "/benchmark_nobatch/file.txt",
|
||||
Attr: filer.Attr{
|
||||
Mode: 0644,
|
||||
Uid: 1000,
|
||||
Gid: 1000,
|
||||
Mtime: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
entry.FullPath = util.NewFullPath("/benchmark_nobatch", fmt.Sprintf("%x", uint64(i))+".txt")
|
||||
err := store.InsertEntry(ctx, entry)
|
||||
if err != nil {
|
||||
b.Fatalf("InsertEntry failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkFoundationDBStore_InsertEntry_WithBatch benchmarks insert performance
|
||||
// with batching enabled (higher throughput for bulk ingestion)
|
||||
func BenchmarkFoundationDBStore_InsertEntry_WithBatch(b *testing.B) {
|
||||
store := createBenchmarkStoreWithBatching(b, true, 100, 1*time.Millisecond)
|
||||
defer store.Shutdown()
|
||||
|
||||
ctx := context.Background()
|
||||
entry := &filer.Entry{
|
||||
FullPath: "/benchmark_batch/file.txt",
|
||||
Attr: filer.Attr{
|
||||
Mode: 0644,
|
||||
Uid: 1000,
|
||||
Gid: 1000,
|
||||
Mtime: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
entry.FullPath = util.NewFullPath("/benchmark_batch", fmt.Sprintf("%x", uint64(i))+".txt")
|
||||
err := store.InsertEntry(ctx, entry)
|
||||
if err != nil {
|
||||
b.Fatalf("InsertEntry failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkFoundationDBStore_ConcurrentInsert_NoBatch benchmarks concurrent inserts
|
||||
// with batching disabled (simulates S3 PUT concurrency)
|
||||
func BenchmarkFoundationDBStore_ConcurrentInsert_NoBatch(b *testing.B) {
|
||||
store := createBenchmarkStoreWithBatching(b, false, 100, 1*time.Millisecond)
|
||||
defer store.Shutdown()
|
||||
|
||||
var counter atomic.Uint64
|
||||
|
||||
b.ResetTimer()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
ctx := context.Background()
|
||||
for pb.Next() {
|
||||
n := counter.Add(1)
|
||||
entry := &filer.Entry{
|
||||
FullPath: util.NewFullPath("/benchmark_concurrent_nobatch", fmt.Sprintf("%d.txt", n)),
|
||||
Attr: filer.Attr{
|
||||
Mode: 0644,
|
||||
Uid: 1000,
|
||||
Gid: 1000,
|
||||
Mtime: time.Now(),
|
||||
},
|
||||
}
|
||||
err := store.InsertEntry(ctx, entry)
|
||||
if err != nil {
|
||||
b.Fatalf("InsertEntry failed: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// BenchmarkFoundationDBStore_ConcurrentInsert_WithBatch benchmarks concurrent inserts
|
||||
// with batching enabled (tests batch efficiency under concurrent load)
|
||||
func BenchmarkFoundationDBStore_ConcurrentInsert_WithBatch(b *testing.B) {
|
||||
store := createBenchmarkStoreWithBatching(b, true, 100, 1*time.Millisecond)
|
||||
defer store.Shutdown()
|
||||
|
||||
var counter atomic.Uint64
|
||||
|
||||
b.ResetTimer()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
ctx := context.Background()
|
||||
for pb.Next() {
|
||||
n := counter.Add(1)
|
||||
entry := &filer.Entry{
|
||||
FullPath: util.NewFullPath("/benchmark_concurrent_batch", fmt.Sprintf("%d.txt", n)),
|
||||
Attr: filer.Attr{
|
||||
Mode: 0644,
|
||||
Uid: 1000,
|
||||
Gid: 1000,
|
||||
Mtime: time.Now(),
|
||||
},
|
||||
}
|
||||
err := store.InsertEntry(ctx, entry)
|
||||
if err != nil {
|
||||
b.Fatalf("InsertEntry failed: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
func getTestClusterFile() string {
|
||||
clusterFile := os.Getenv("FDB_CLUSTER_FILE")
|
||||
@@ -349,6 +464,32 @@ func createBenchmarkStore(b *testing.B) *FoundationDBStore {
|
||||
return store
|
||||
}
|
||||
|
||||
// createBenchmarkStoreWithBatching creates a store with specific batching configuration
|
||||
// for comparing performance between batched and non-batched modes
|
||||
func createBenchmarkStoreWithBatching(b *testing.B, batchEnabled bool, batchSize int, batchInterval time.Duration) *FoundationDBStore {
|
||||
clusterFile := getTestClusterFile()
|
||||
if _, err := os.Stat(clusterFile); os.IsNotExist(err) {
|
||||
b.Skip("FoundationDB cluster file not found, skipping benchmark")
|
||||
}
|
||||
|
||||
store := &FoundationDBStore{
|
||||
batchEnabled: batchEnabled,
|
||||
batchSize: batchSize,
|
||||
batchInterval: batchInterval,
|
||||
directoryPrefix: "benchmark",
|
||||
timeout: 5 * time.Second,
|
||||
maxRetryDelay: 1 * time.Second,
|
||||
}
|
||||
err := store.initialize(clusterFile, 740)
|
||||
if err != nil {
|
||||
b.Skipf("Failed to initialize FoundationDB store: %v", err)
|
||||
}
|
||||
|
||||
// Note: initialize() already creates the batcher if batchEnabled is true
|
||||
|
||||
return store
|
||||
}
|
||||
|
||||
func getTestStore(t *testing.T) *FoundationDBStore {
|
||||
t.Helper()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user