* improve large file sync throughput for remote.cache and filer.sync
Three main throughput improvements:
1. Adaptive chunk sizing for remote.cache: targets ~32 chunks per file
instead of always starting at 5MB. A 500MB file now uses ~16MB chunks
(32 chunks) instead of 5MB chunks (100 chunks), reducing per-chunk
overhead (volume assign, gRPC call, needle write) by 3x.
2. Configurable concurrency at every layer:
- remote.cache chunk concurrency: -chunkConcurrency flag (default 8)
- remote.cache S3 download concurrency: -downloadConcurrency flag
(default raised from 1 to 5 per chunk)
- filer.sync chunk concurrency: -chunkConcurrency flag (default 32)
3. S3 multipart download concurrency raised from 1 to 5: the S3 manager
downloader was using Concurrency=1, serializing all part downloads
within each chunk. This alone can 5x per-chunk download speed.
The concurrency values flow through the gRPC request chain:
shell command → CacheRemoteObjectToLocalClusterRequest →
FetchAndWriteNeedleRequest → S3 downloader
Zero values in the request mean "use server defaults", maintaining
full backward compatibility with existing callers.
Ref #8481
* fix: use full maxMB for chunk size cap and remove loop guard
Address review feedback:
- Use full maxMB instead of maxMB/2 for maxChunkSize to avoid
unnecessarily limiting chunk size for very large files.
- Remove chunkSize < maxChunkSize guard from the safety loop so it
can always grow past maxChunkSize when needed to stay under 1000
chunks (e.g., extremely large files with small maxMB).
* address review feedback: help text, validation, naming, docs
- Fix help text for -chunkConcurrency and -downloadConcurrency flags
to say "0 = server default" instead of advertising specific numeric
defaults that could drift from the server implementation.
- Validate chunkConcurrency and downloadConcurrency are within int32
range before narrowing, returning a user-facing error if out of range.
- Rename ReadRemoteErr to readRemoteErr to follow Go naming conventions.
- Add doc comment to SetChunkConcurrency noting it must be called
during initialization before replication goroutines start.
- Replace doubling loop in chunk size safety check with direct
ceil(remoteSize/1000) computation to guarantee the 1000-chunk cap.
* address Copilot review: clamp concurrency, fix chunk count, clarify proto docs
- Use ceiling division for chunk count check to avoid overcounting
when file size is an exact multiple of chunk size.
- Clamp chunkConcurrency (max 1024) and downloadConcurrency (max 1024
at filer, max 64 at volume server) to prevent excessive goroutines.
- Always use ReadFileWithConcurrency when the client supports it,
falling back to the implementation's default when value is 0.
- Clarify proto comments that download_concurrency only applies when
the remote storage client supports it (currently S3).
- Include specific server defaults in help text (e.g., "0 = server
default 8") so users see the actual values in -h output.
* fix data race on executionErr and use %w for error wrapping
- Protect concurrent writes to executionErr in remote.cache worker
goroutines with a sync.Mutex to eliminate the data race.
- Use %w instead of %v in volume_grpc_remote.go error formatting
to preserve the error chain for errors.Is/errors.As callers.
108 lines
3.4 KiB
Go
108 lines
3.4 KiB
Go
package weed_server
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/operation"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/security"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
)
|
|
|
|
func (vs *VolumeServer) FetchAndWriteNeedle(ctx context.Context, req *volume_server_pb.FetchAndWriteNeedleRequest) (resp *volume_server_pb.FetchAndWriteNeedleResponse, err error) {
|
|
if err := vs.CheckMaintenanceMode(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp = &volume_server_pb.FetchAndWriteNeedleResponse{}
|
|
v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
|
|
if v == nil {
|
|
return nil, fmt.Errorf("not found volume id %d", req.VolumeId)
|
|
}
|
|
|
|
remoteConf := req.RemoteConf
|
|
|
|
client, getClientErr := remote_storage.GetRemoteStorage(remoteConf)
|
|
if getClientErr != nil {
|
|
return nil, fmt.Errorf("get remote client: %w", getClientErr)
|
|
}
|
|
|
|
remoteStorageLocation := req.RemoteLocation
|
|
|
|
var data []byte
|
|
var readRemoteErr error
|
|
if cr, ok := client.(remote_storage.RemoteStorageConcurrentReader); ok {
|
|
concurrency := int(req.DownloadConcurrency)
|
|
if concurrency <= 0 {
|
|
concurrency = 0 // let the implementation choose its default
|
|
} else if concurrency > 64 {
|
|
concurrency = 64
|
|
}
|
|
data, readRemoteErr = cr.ReadFileWithConcurrency(remoteStorageLocation, req.Offset, req.Size, concurrency)
|
|
} else {
|
|
data, readRemoteErr = client.ReadFile(remoteStorageLocation, req.Offset, req.Size)
|
|
}
|
|
if readRemoteErr != nil {
|
|
return nil, fmt.Errorf("read from remote %+v: %w", remoteStorageLocation, readRemoteErr)
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
n := new(needle.Needle)
|
|
n.Id = types.NeedleId(req.NeedleId)
|
|
n.Cookie = types.Cookie(req.Cookie)
|
|
n.Data, n.DataSize = data, uint32(len(data))
|
|
// copied from *Needle.prepareWriteBuffer()
|
|
n.Size = 4 + types.Size(n.DataSize) + 1
|
|
n.Checksum = needle.NewCRC(n.Data)
|
|
n.LastModified = uint64(time.Now().Unix())
|
|
n.SetHasLastModifiedDate()
|
|
if _, localWriteErr := vs.store.WriteVolumeNeedle(v.Id, n, true, false); localWriteErr != nil {
|
|
if err == nil {
|
|
err = fmt.Errorf("local write needle %d size %d: %v", req.NeedleId, req.Size, localWriteErr)
|
|
}
|
|
} else {
|
|
resp.ETag = n.Etag()
|
|
}
|
|
}()
|
|
if len(req.Replicas) > 0 {
|
|
fileId := needle.NewFileId(v.Id, req.NeedleId, req.Cookie)
|
|
for _, replica := range req.Replicas {
|
|
wg.Add(1)
|
|
go func(targetVolumeServer string) {
|
|
defer wg.Done()
|
|
uploadOption := &operation.UploadOption{
|
|
UploadUrl: fmt.Sprintf("http://%s/%s?type=replicate", targetVolumeServer, fileId.String()),
|
|
Filename: "",
|
|
Cipher: false,
|
|
IsInputCompressed: false,
|
|
MimeType: "",
|
|
PairMap: nil,
|
|
Jwt: security.EncodedJwt(req.Auth),
|
|
}
|
|
|
|
uploader, uploaderErr := operation.NewUploader()
|
|
if uploaderErr != nil && err == nil {
|
|
err = fmt.Errorf("remote write needle %d size %d: %v", req.NeedleId, req.Size, uploaderErr)
|
|
return
|
|
}
|
|
|
|
if _, replicaWriteErr := uploader.UploadData(ctx, data, uploadOption); replicaWriteErr != nil && err == nil {
|
|
err = fmt.Errorf("remote write needle %d size %d: %v", req.NeedleId, req.Size, replicaWriteErr)
|
|
}
|
|
}(replica.Url)
|
|
}
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
return resp, err
|
|
}
|