* chore: remove unreachable dead code across the codebase Remove ~50,000 lines of unreachable code identified by static analysis. Major removals: - weed/filer/redis_lua: entire unused Redis Lua filer store implementation - weed/wdclient/net2, resource_pool: unused connection/resource pool packages - weed/plugin/worker/lifecycle: unused lifecycle plugin worker - weed/s3api: unused S3 policy templates, presigned URL IAM, streaming copy, multipart IAM, key rotation, and various SSE helper functions - weed/mq/kafka: unused partition mapping, compression, schema, and protocol functions - weed/mq/offset: unused SQL storage and migration code - weed/worker: unused registry, task, and monitoring functions - weed/query: unused SQL engine, parquet scanner, and type functions - weed/shell: unused EC proportional rebalance functions - weed/storage/erasure_coding/distribution: unused distribution analysis functions - Individual unreachable functions removed from 150+ files across admin, credential, filer, iam, kms, mount, mq, operation, pb, s3api, server, shell, storage, topology, and util packages * fix(s3): reset shared memory store in IAM test to prevent flaky failure TestLoadIAMManagerFromConfig_EmptyConfigWithFallbackKey was flaky because the MemoryStore credential backend is a singleton registered via init(). Earlier tests that create anonymous identities pollute the shared store, causing LookupAnonymous() to unexpectedly return true. Fix by calling Reset() on the memory store before the test runs. * style: run gofmt on changed files * fix: restore KMS functions used by integration tests * fix(plugin): prevent panic on send to closed worker session channel The Plugin.sendToWorker method could panic with "send on closed channel" when a worker disconnected while a message was being sent. The race was between streamSession.close() closing the outgoing channel and sendToWorker writing to it concurrently. Add a done channel to streamSession that is closed before the outgoing channel, and check it in sendToWorker's select to safely detect closed sessions without panicking.
374 lines
11 KiB
Go
374 lines
11 KiB
Go
package mount
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"strings"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/wdclient"
|
|
)
|
|
|
|
// RDMAMountClient provides RDMA acceleration for SeaweedFS mount operations
|
|
type RDMAMountClient struct {
|
|
sidecarAddr string
|
|
httpClient *http.Client
|
|
maxConcurrent int
|
|
timeout time.Duration
|
|
semaphore chan struct{}
|
|
|
|
// Volume lookup
|
|
lookupFileIdFn wdclient.LookupFileIdFunctionType
|
|
|
|
// Statistics
|
|
totalRequests atomic.Int64
|
|
successfulReads atomic.Int64
|
|
failedReads atomic.Int64
|
|
totalBytesRead atomic.Int64
|
|
totalLatencyNs atomic.Int64
|
|
}
|
|
|
|
// RDMAReadRequest represents a request to read data via RDMA
|
|
type RDMAReadRequest struct {
|
|
VolumeID uint32 `json:"volume_id"`
|
|
NeedleID uint64 `json:"needle_id"`
|
|
Cookie uint32 `json:"cookie"`
|
|
Offset uint64 `json:"offset"`
|
|
Size uint64 `json:"size"`
|
|
}
|
|
|
|
// RDMAReadResponse represents the response from an RDMA read operation
|
|
type RDMAReadResponse struct {
|
|
Success bool `json:"success"`
|
|
IsRDMA bool `json:"is_rdma"`
|
|
Source string `json:"source"`
|
|
Duration string `json:"duration"`
|
|
DataSize int `json:"data_size"`
|
|
SessionID string `json:"session_id,omitempty"`
|
|
ErrorMsg string `json:"error,omitempty"`
|
|
|
|
// Zero-copy optimization fields
|
|
UseTempFile bool `json:"use_temp_file"`
|
|
TempFile string `json:"temp_file"`
|
|
}
|
|
|
|
// RDMAHealthResponse represents the health status of the RDMA sidecar
|
|
type RDMAHealthResponse struct {
|
|
Status string `json:"status"`
|
|
RDMA struct {
|
|
Enabled bool `json:"enabled"`
|
|
Connected bool `json:"connected"`
|
|
} `json:"rdma"`
|
|
Timestamp string `json:"timestamp"`
|
|
}
|
|
|
|
// NewRDMAMountClient creates a new RDMA client for mount operations
|
|
func NewRDMAMountClient(sidecarAddr string, lookupFileIdFn wdclient.LookupFileIdFunctionType, maxConcurrent int, timeoutMs int) (*RDMAMountClient, error) {
|
|
client := &RDMAMountClient{
|
|
sidecarAddr: sidecarAddr,
|
|
maxConcurrent: maxConcurrent,
|
|
timeout: time.Duration(timeoutMs) * time.Millisecond,
|
|
httpClient: &http.Client{
|
|
Timeout: time.Duration(timeoutMs) * time.Millisecond,
|
|
},
|
|
semaphore: make(chan struct{}, maxConcurrent),
|
|
lookupFileIdFn: lookupFileIdFn,
|
|
}
|
|
|
|
// Test connectivity and RDMA availability
|
|
if err := client.healthCheck(); err != nil {
|
|
return nil, fmt.Errorf("RDMA sidecar health check failed: %w", err)
|
|
}
|
|
|
|
glog.Infof("RDMA mount client initialized: sidecar=%s, maxConcurrent=%d, timeout=%v",
|
|
sidecarAddr, maxConcurrent, client.timeout)
|
|
|
|
return client, nil
|
|
}
|
|
|
|
// lookupVolumeLocationByFileID finds the best volume server for a given file ID
|
|
func (c *RDMAMountClient) lookupVolumeLocationByFileID(ctx context.Context, fileID string) (string, error) {
|
|
glog.V(4).Infof("Looking up volume location for file ID %s", fileID)
|
|
|
|
targetUrls, err := c.lookupFileIdFn(ctx, fileID)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to lookup volume for file %s: %w", fileID, err)
|
|
}
|
|
|
|
if len(targetUrls) == 0 {
|
|
return "", fmt.Errorf("no locations found for file %s", fileID)
|
|
}
|
|
|
|
// Choose the first URL and extract the server address
|
|
targetUrl := targetUrls[0]
|
|
// Extract server address from URL like "http://server:port/fileId"
|
|
parts := strings.Split(targetUrl, "/")
|
|
if len(parts) < 3 {
|
|
return "", fmt.Errorf("invalid target URL format: %s", targetUrl)
|
|
}
|
|
bestAddress := fmt.Sprintf("http://%s", parts[2])
|
|
|
|
glog.V(4).Infof("File %s located at %s", fileID, bestAddress)
|
|
return bestAddress, nil
|
|
}
|
|
|
|
// healthCheck verifies that the RDMA sidecar is available and functioning
|
|
func (c *RDMAMountClient) healthCheck() error {
|
|
ctx, cancel := context.WithTimeout(context.Background(), c.timeout)
|
|
defer cancel()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET",
|
|
fmt.Sprintf("http://%s/health", c.sidecarAddr), nil)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create health check request: %w", err)
|
|
}
|
|
|
|
resp, err := c.httpClient.Do(req)
|
|
if err != nil {
|
|
return fmt.Errorf("health check request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return fmt.Errorf("health check failed with status: %s", resp.Status)
|
|
}
|
|
|
|
// Parse health response
|
|
var health RDMAHealthResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&health); err != nil {
|
|
return fmt.Errorf("failed to parse health response: %w", err)
|
|
}
|
|
|
|
if health.Status != "healthy" {
|
|
return fmt.Errorf("sidecar reports unhealthy status: %s", health.Status)
|
|
}
|
|
|
|
if !health.RDMA.Enabled {
|
|
return fmt.Errorf("RDMA is not enabled on sidecar")
|
|
}
|
|
|
|
if !health.RDMA.Connected {
|
|
glog.Warningf("RDMA sidecar is healthy but not connected to RDMA engine")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ReadNeedle reads data from a specific needle using RDMA acceleration
|
|
func (c *RDMAMountClient) ReadNeedle(ctx context.Context, fileID string, offset, size uint64) ([]byte, bool, error) {
|
|
// Acquire semaphore for concurrency control
|
|
select {
|
|
case c.semaphore <- struct{}{}:
|
|
defer func() { <-c.semaphore }()
|
|
case <-ctx.Done():
|
|
return nil, false, ctx.Err()
|
|
}
|
|
|
|
c.totalRequests.Add(1)
|
|
startTime := time.Now()
|
|
|
|
// Lookup volume location using file ID directly
|
|
volumeServer, err := c.lookupVolumeLocationByFileID(ctx, fileID)
|
|
if err != nil {
|
|
c.failedReads.Add(1)
|
|
return nil, false, fmt.Errorf("failed to lookup volume for file %s: %w", fileID, err)
|
|
}
|
|
|
|
// Prepare request URL with file_id parameter (simpler than individual components)
|
|
reqURL := fmt.Sprintf("http://%s/read?file_id=%s&offset=%d&size=%d&volume_server=%s",
|
|
c.sidecarAddr, fileID, offset, size, volumeServer)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
|
|
if err != nil {
|
|
c.failedReads.Add(1)
|
|
return nil, false, fmt.Errorf("failed to create RDMA request: %w", err)
|
|
}
|
|
|
|
// Execute request
|
|
resp, err := c.httpClient.Do(req)
|
|
if err != nil {
|
|
c.failedReads.Add(1)
|
|
return nil, false, fmt.Errorf("RDMA request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
duration := time.Since(startTime)
|
|
c.totalLatencyNs.Add(duration.Nanoseconds())
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
c.failedReads.Add(1)
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return nil, false, fmt.Errorf("RDMA read failed with status %s: %s", resp.Status, string(body))
|
|
}
|
|
|
|
// Check if response indicates RDMA was used
|
|
contentType := resp.Header.Get("Content-Type")
|
|
isRDMA := strings.Contains(resp.Header.Get("X-Source"), "rdma") ||
|
|
resp.Header.Get("X-RDMA-Used") == "true"
|
|
|
|
// Check for zero-copy temp file optimization
|
|
tempFilePath := resp.Header.Get("X-Temp-File")
|
|
useTempFile := resp.Header.Get("X-Use-Temp-File") == "true"
|
|
|
|
var data []byte
|
|
|
|
var n int
|
|
if useTempFile && tempFilePath != "" {
|
|
// Zero-copy path: read from temp file (page cache)
|
|
glog.V(4).Infof("🔥 Using zero-copy temp file: %s", tempFilePath)
|
|
|
|
// Allocate buffer for temp file read
|
|
var bufferSize uint64 = 1024 * 1024 // Default 1MB
|
|
if size > 0 {
|
|
bufferSize = size
|
|
}
|
|
buffer := make([]byte, bufferSize)
|
|
|
|
n, err = c.readFromTempFile(tempFilePath, buffer)
|
|
if err != nil {
|
|
glog.V(2).Infof("Zero-copy failed, falling back to HTTP body: %v", err)
|
|
// Fall back to reading HTTP body
|
|
data, err = io.ReadAll(resp.Body)
|
|
} else {
|
|
data = buffer[:n]
|
|
glog.V(4).Infof("🔥 Zero-copy successful: %d bytes from page cache", n)
|
|
}
|
|
|
|
// Important: Cleanup temp file after reading (consumer responsibility)
|
|
// This prevents accumulation of temp files in /tmp/rdma-cache
|
|
go c.cleanupTempFile(tempFilePath)
|
|
} else {
|
|
// Regular path: read from HTTP response body
|
|
data, err = io.ReadAll(resp.Body)
|
|
}
|
|
|
|
if err != nil {
|
|
c.failedReads.Add(1)
|
|
return nil, false, fmt.Errorf("failed to read RDMA response: %w", err)
|
|
}
|
|
|
|
c.successfulReads.Add(1)
|
|
c.totalBytesRead.Add(int64(len(data)))
|
|
|
|
// Log successful operation
|
|
glog.V(4).Infof("RDMA read completed: fileID=%s, size=%d, duration=%v, rdma=%v, contentType=%s",
|
|
fileID, size, duration, isRDMA, contentType)
|
|
|
|
return data, isRDMA, nil
|
|
}
|
|
|
|
// cleanupTempFile requests cleanup of a temp file from the sidecar
|
|
func (c *RDMAMountClient) cleanupTempFile(tempFilePath string) {
|
|
if tempFilePath == "" {
|
|
return
|
|
}
|
|
|
|
// Give the page cache a brief moment to be utilized before cleanup
|
|
// This preserves the zero-copy performance window
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Call sidecar cleanup endpoint
|
|
cleanupURL := fmt.Sprintf("http://%s/cleanup?temp_file=%s", c.sidecarAddr, url.QueryEscape(tempFilePath))
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "DELETE", cleanupURL, nil)
|
|
if err != nil {
|
|
glog.V(2).Infof("Failed to create cleanup request for %s: %v", tempFilePath, err)
|
|
return
|
|
}
|
|
|
|
resp, err := c.httpClient.Do(req)
|
|
if err != nil {
|
|
glog.V(2).Infof("Failed to cleanup temp file %s: %v", tempFilePath, err)
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode == http.StatusOK {
|
|
glog.V(4).Infof("🧹 Temp file cleaned up: %s", tempFilePath)
|
|
} else {
|
|
glog.V(2).Infof("Cleanup failed for %s: status %s", tempFilePath, resp.Status)
|
|
}
|
|
}
|
|
|
|
// GetStats returns current RDMA client statistics
|
|
func (c *RDMAMountClient) GetStats() map[string]interface{} {
|
|
totalRequests := c.totalRequests.Load()
|
|
successfulReads := c.successfulReads.Load()
|
|
failedReads := c.failedReads.Load()
|
|
totalBytesRead := c.totalBytesRead.Load()
|
|
totalLatencyNs := c.totalLatencyNs.Load()
|
|
|
|
successRate := float64(0)
|
|
avgLatencyNs := int64(0)
|
|
|
|
if totalRequests > 0 {
|
|
successRate = float64(successfulReads) / float64(totalRequests) * 100
|
|
avgLatencyNs = totalLatencyNs / totalRequests
|
|
}
|
|
|
|
return map[string]interface{}{
|
|
"sidecar_addr": c.sidecarAddr,
|
|
"max_concurrent": c.maxConcurrent,
|
|
"timeout_ms": int(c.timeout / time.Millisecond),
|
|
"total_requests": totalRequests,
|
|
"successful_reads": successfulReads,
|
|
"failed_reads": failedReads,
|
|
"success_rate_pct": fmt.Sprintf("%.1f", successRate),
|
|
"total_bytes_read": totalBytesRead,
|
|
"avg_latency_ns": avgLatencyNs,
|
|
"avg_latency_ms": fmt.Sprintf("%.3f", float64(avgLatencyNs)/1000000),
|
|
}
|
|
}
|
|
|
|
// Close shuts down the RDMA client and releases resources
|
|
func (c *RDMAMountClient) Close() error {
|
|
// No need to close semaphore channel; closing it may cause panics if goroutines are still using it.
|
|
// The semaphore will be garbage collected when the client is no longer referenced.
|
|
|
|
// Log final statistics
|
|
stats := c.GetStats()
|
|
glog.Infof("RDMA mount client closing: %+v", stats)
|
|
|
|
return nil
|
|
}
|
|
|
|
// IsHealthy checks if the RDMA sidecar is currently healthy
|
|
func (c *RDMAMountClient) IsHealthy() bool {
|
|
err := c.healthCheck()
|
|
return err == nil
|
|
}
|
|
|
|
// readFromTempFile performs zero-copy read from temp file using page cache
|
|
func (c *RDMAMountClient) readFromTempFile(tempFilePath string, buffer []byte) (int, error) {
|
|
if tempFilePath == "" {
|
|
return 0, fmt.Errorf("empty temp file path")
|
|
}
|
|
|
|
// Open temp file for reading
|
|
file, err := os.Open(tempFilePath)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("failed to open temp file %s: %w", tempFilePath, err)
|
|
}
|
|
defer file.Close()
|
|
|
|
// Read from temp file (this should be served from page cache)
|
|
n, err := file.Read(buffer)
|
|
if err != nil && err != io.EOF {
|
|
return n, fmt.Errorf("failed to read from temp file: %w", err)
|
|
}
|
|
|
|
glog.V(4).Infof("🔥 Zero-copy read: %d bytes from temp file %s", n, tempFilePath)
|
|
|
|
return n, nil
|
|
}
|