* feat(s3): add concurrent chunk prefetch for large file downloads Add a pipe-based prefetch pipeline that overlaps chunk fetching with response writing during S3 GetObject, SSE downloads, and filer proxy. While chunk N streams to the HTTP response, fetch goroutines for the next K chunks establish HTTP connections to volume servers ahead of time, eliminating the RTT gap between sequential chunk fetches. Uses io.Pipe for minimal memory overhead (~1MB per download regardless of chunk size, vs buffering entire chunks). Also increases the streaming read buffer from 64KB to 256KB to reduce syscall overhead. Benchmark results (64KB chunks, prefetch=4): - 0ms latency: 1058 → 2362 MB/s (2.2× faster) - 5ms latency: 11.0 → 41.7 MB/s (3.8× faster) - 10ms latency: 5.9 → 23.3 MB/s (4.0× faster) - 20ms latency: 3.1 → 12.1 MB/s (3.9× faster) * fix: address review feedback for prefetch pipeline - Fix data race: use *chunkPipeResult (pointer) on channel to avoid copying struct while fetch goroutines write to it. Confirmed clean with -race detector. - Remove concurrent map write: retryWithCacheInvalidation no longer updates fileId2Url map. Producer only reads it; consumer never writes. - Use mem.Allocate/mem.Free for copy buffer to reduce GC pressure. - Add local cancellable context so consumer errors (client disconnect) immediately stop the producer and all in-flight fetch goroutines. * fix(test): remove dead code and add Range header support in test server - Remove unused allData variable in makeChunksAndServer - Add Range header handling to createTestServer for partial chunk read coverage (206 Partial Content, 416 Range Not Satisfiable) * fix: correct retry condition and goroutine leak in prefetch pipeline - Fix retry condition: use result.fetchErr/result.written instead of copied to decide cache-invalidation retry. The old condition wrongly triggered retry when the fetch succeeded but the response writer failed on the first write (copied==0 despite fetcher having data). Now matches the sequential path (stream.go:197) which checks whether the fetcher itself wrote zero bytes. - Fix goroutine leak: when the producer's send to the results channel is interrupted by context cancellation, the fetch goroutine was already launched but the result was never sent to the channel. The drain loop couldn't handle it. Now waits on result.done before returning so every fetch goroutine is properly awaited.
318 lines
8.6 KiB
Go
318 lines
8.6 KiB
Go
package filer
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"math/rand"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/wdclient"
|
|
)
|
|
|
|
func TestMain(m *testing.M) {
|
|
// Initialize the global HTTP client required by ReadUrlAsStream
|
|
util_http.InitGlobalHttpClient()
|
|
os.Exit(m.Run())
|
|
}
|
|
|
|
// mockMasterClientForBenchmark implements HasLookupFileIdFunction and CacheInvalidator
|
|
type mockMasterClientForBenchmark struct {
|
|
urls map[string][]string
|
|
}
|
|
|
|
func (m *mockMasterClientForBenchmark) GetLookupFileIdFunction() wdclient.LookupFileIdFunctionType {
|
|
return func(ctx context.Context, fileId string) ([]string, error) {
|
|
if urls, ok := m.urls[fileId]; ok {
|
|
return urls, nil
|
|
}
|
|
return nil, fmt.Errorf("fileId %s not found", fileId)
|
|
}
|
|
}
|
|
|
|
func (m *mockMasterClientForBenchmark) InvalidateCache(fileId string) {}
|
|
|
|
// noopJwtFunc returns empty JWT for testing
|
|
func noopJwtFunc(fileId string) string {
|
|
return ""
|
|
}
|
|
|
|
// createMockVolumeServer creates an httptest server that serves chunk data
|
|
// with configurable per-request latency to simulate network conditions.
|
|
// The latency is applied once per request (simulating RTT), not per byte.
|
|
func createMockVolumeServer(chunkData map[string][]byte, latency time.Duration) *httptest.Server {
|
|
var mu sync.RWMutex
|
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
// Simulate network latency (connection setup + RTT)
|
|
if latency > 0 {
|
|
time.Sleep(latency)
|
|
}
|
|
|
|
// Extract fileId from path (e.g., "/1,abc123")
|
|
path := r.URL.Path
|
|
if strings.HasPrefix(path, "/") {
|
|
path = path[1:]
|
|
}
|
|
|
|
mu.RLock()
|
|
data, ok := chunkData[path]
|
|
mu.RUnlock()
|
|
if !ok {
|
|
http.Error(w, "not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
// Handle Range header
|
|
rangeHeader := r.Header.Get("Range")
|
|
if rangeHeader != "" {
|
|
var start, end int64
|
|
fmt.Sscanf(rangeHeader, "bytes=%d-%d", &start, &end)
|
|
if start >= 0 && end < int64(len(data)) && start <= end {
|
|
w.Header().Set("Content-Length", fmt.Sprintf("%d", end-start+1))
|
|
w.WriteHeader(http.StatusPartialContent)
|
|
w.Write(data[start : end+1])
|
|
return
|
|
}
|
|
}
|
|
|
|
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data)))
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write(data)
|
|
}))
|
|
}
|
|
|
|
// benchmarkConfig holds parameters for a single benchmark scenario
|
|
type benchmarkConfig struct {
|
|
numChunks int
|
|
chunkSize int
|
|
latency time.Duration
|
|
prefetch int // 0 = sequential
|
|
}
|
|
|
|
func (c benchmarkConfig) name() string {
|
|
name := fmt.Sprintf("chunks=%d/size=%dKB/latency=%dms",
|
|
c.numChunks, c.chunkSize/1024, c.latency.Milliseconds())
|
|
if c.prefetch > 0 {
|
|
name += fmt.Sprintf("/prefetch=%d", c.prefetch)
|
|
}
|
|
return name
|
|
}
|
|
|
|
// setupBenchmark creates mock infrastructure and returns chunks, master client, and cleanup func
|
|
func setupBenchmark(b *testing.B, cfg benchmarkConfig) ([]*filer_pb.FileChunk, *mockMasterClientForBenchmark, func()) {
|
|
b.Helper()
|
|
|
|
// Generate random chunk data
|
|
chunkData := make(map[string][]byte, cfg.numChunks)
|
|
chunks := make([]*filer_pb.FileChunk, cfg.numChunks)
|
|
|
|
for i := 0; i < cfg.numChunks; i++ {
|
|
fileId := fmt.Sprintf("1,%x", i)
|
|
data := make([]byte, cfg.chunkSize)
|
|
rand.Read(data)
|
|
chunkData[fileId] = data
|
|
|
|
chunks[i] = &filer_pb.FileChunk{
|
|
FileId: fileId,
|
|
Offset: int64(i * cfg.chunkSize),
|
|
Size: uint64(cfg.chunkSize),
|
|
ModifiedTsNs: int64(i),
|
|
Fid: &filer_pb.FileId{FileKey: uint64(i)},
|
|
}
|
|
}
|
|
|
|
// Start mock volume server
|
|
server := createMockVolumeServer(chunkData, cfg.latency)
|
|
|
|
// Build URL map
|
|
urls := make(map[string][]string, cfg.numChunks)
|
|
for i := 0; i < cfg.numChunks; i++ {
|
|
fileId := fmt.Sprintf("1,%x", i)
|
|
urls[fileId] = []string{server.URL + "/" + fileId}
|
|
}
|
|
|
|
masterClient := &mockMasterClientForBenchmark{urls: urls}
|
|
cleanup := func() { server.Close() }
|
|
|
|
return chunks, masterClient, cleanup
|
|
}
|
|
|
|
// runSequentialBenchmark runs the current sequential streaming path
|
|
func runSequentialBenchmark(b *testing.B, cfg benchmarkConfig) {
|
|
chunks, masterClient, cleanup := setupBenchmark(b, cfg)
|
|
defer cleanup()
|
|
|
|
totalSize := int64(cfg.numChunks * cfg.chunkSize)
|
|
|
|
b.ResetTimer()
|
|
b.SetBytes(totalSize)
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
streamFn, err := PrepareStreamContentWithThrottler(
|
|
context.Background(),
|
|
masterClient,
|
|
noopJwtFunc,
|
|
chunks,
|
|
0,
|
|
totalSize,
|
|
0, // no throttle
|
|
)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if err := streamFn(io.Discard); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkStreamSequential benchmarks the current sequential streaming path.
|
|
// This provides the BEFORE baseline for comparison.
|
|
func BenchmarkStreamSequential(b *testing.B) {
|
|
configs := []benchmarkConfig{
|
|
// Pure throughput (no latency)
|
|
{numChunks: 16, chunkSize: 64 * 1024, latency: 0},
|
|
{numChunks: 64, chunkSize: 64 * 1024, latency: 0},
|
|
// Moderate latency — shows RTT gap overhead
|
|
{numChunks: 16, chunkSize: 64 * 1024, latency: 5 * time.Millisecond},
|
|
{numChunks: 64, chunkSize: 64 * 1024, latency: 5 * time.Millisecond},
|
|
// High latency — significant RTT overhead
|
|
{numChunks: 16, chunkSize: 64 * 1024, latency: 20 * time.Millisecond},
|
|
{numChunks: 64, chunkSize: 64 * 1024, latency: 10 * time.Millisecond},
|
|
}
|
|
|
|
for _, cfg := range configs {
|
|
b.Run(cfg.name(), func(b *testing.B) {
|
|
runSequentialBenchmark(b, cfg)
|
|
})
|
|
}
|
|
}
|
|
|
|
// BenchmarkStreamSequentialVerify is a quick functional test that the benchmark
|
|
// infrastructure works correctly — ensures data integrity through the pipeline.
|
|
func BenchmarkStreamSequentialVerify(b *testing.B) {
|
|
cfg := benchmarkConfig{numChunks: 4, chunkSize: 1024, latency: 0}
|
|
chunks, masterClient, cleanup := setupBenchmark(b, cfg)
|
|
defer cleanup()
|
|
|
|
totalSize := int64(cfg.numChunks * cfg.chunkSize)
|
|
|
|
streamFn, err := PrepareStreamContentWithThrottler(
|
|
context.Background(),
|
|
masterClient,
|
|
noopJwtFunc,
|
|
chunks,
|
|
0,
|
|
totalSize,
|
|
0,
|
|
)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
if err := streamFn(&buf); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
if buf.Len() != int(totalSize) {
|
|
b.Fatalf("expected %d bytes, got %d", totalSize, buf.Len())
|
|
}
|
|
}
|
|
|
|
// runPrefetchBenchmark runs the new prefetch streaming path
|
|
func runPrefetchBenchmark(b *testing.B, cfg benchmarkConfig) {
|
|
chunks, masterClient, cleanup := setupBenchmark(b, cfg)
|
|
defer cleanup()
|
|
|
|
totalSize := int64(cfg.numChunks * cfg.chunkSize)
|
|
|
|
b.ResetTimer()
|
|
b.SetBytes(totalSize)
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
streamFn, err := PrepareStreamContentWithPrefetch(
|
|
context.Background(),
|
|
masterClient,
|
|
noopJwtFunc,
|
|
chunks,
|
|
0,
|
|
totalSize,
|
|
0, // no throttle
|
|
cfg.prefetch,
|
|
)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if err := streamFn(io.Discard); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkStreamPrefetch benchmarks the new prefetch streaming path.
|
|
// Compare against BenchmarkStreamSequential for the AFTER measurement.
|
|
func BenchmarkStreamPrefetch(b *testing.B) {
|
|
configs := []benchmarkConfig{
|
|
// Pure throughput (no latency) — should be similar to sequential
|
|
{numChunks: 16, chunkSize: 64 * 1024, latency: 0, prefetch: 4},
|
|
{numChunks: 64, chunkSize: 64 * 1024, latency: 0, prefetch: 4},
|
|
// Moderate latency — prefetch should eliminate most RTT overhead
|
|
{numChunks: 16, chunkSize: 64 * 1024, latency: 5 * time.Millisecond, prefetch: 4},
|
|
{numChunks: 64, chunkSize: 64 * 1024, latency: 5 * time.Millisecond, prefetch: 4},
|
|
// High latency — most benefit from prefetch
|
|
{numChunks: 16, chunkSize: 64 * 1024, latency: 20 * time.Millisecond, prefetch: 4},
|
|
{numChunks: 64, chunkSize: 64 * 1024, latency: 10 * time.Millisecond, prefetch: 4},
|
|
// Vary prefetch count with moderate latency
|
|
{numChunks: 64, chunkSize: 64 * 1024, latency: 5 * time.Millisecond, prefetch: 2},
|
|
{numChunks: 64, chunkSize: 64 * 1024, latency: 5 * time.Millisecond, prefetch: 8},
|
|
}
|
|
|
|
for _, cfg := range configs {
|
|
b.Run(cfg.name(), func(b *testing.B) {
|
|
runPrefetchBenchmark(b, cfg)
|
|
})
|
|
}
|
|
}
|
|
|
|
// BenchmarkStreamPrefetchVerify verifies data integrity through the prefetch pipeline.
|
|
func BenchmarkStreamPrefetchVerify(b *testing.B) {
|
|
cfg := benchmarkConfig{numChunks: 4, chunkSize: 1024, latency: 0, prefetch: 4}
|
|
chunks, masterClient, cleanup := setupBenchmark(b, cfg)
|
|
defer cleanup()
|
|
|
|
totalSize := int64(cfg.numChunks * cfg.chunkSize)
|
|
|
|
streamFn, err := PrepareStreamContentWithPrefetch(
|
|
context.Background(),
|
|
masterClient,
|
|
noopJwtFunc,
|
|
chunks,
|
|
0,
|
|
totalSize,
|
|
0,
|
|
cfg.prefetch,
|
|
)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
if err := streamFn(&buf); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
if buf.Len() != int(totalSize) {
|
|
b.Fatalf("expected %d bytes, got %d", totalSize, buf.Len())
|
|
}
|
|
}
|