s3: fix remote object not caching (#7790)

* s3: fix remote object not caching

* s3: address review comments for remote object caching

- Fix leading slash in object name by using strings.TrimPrefix
- Return cached entry from CacheRemoteObjectToLocalCluster to get updated local chunk locations
- Reuse existing helper function instead of inline gRPC call

* s3/filer: add singleflight deduplication for remote object caching

- Add singleflight.Group to FilerServer to deduplicate concurrent cache operations
- Wrap CacheRemoteObjectToLocalCluster with singleflight to ensure only one
  caching operation runs per object when multiple clients request the same file
- Add early-return check for already-cached objects
- S3 API calls filer gRPC with timeout and graceful fallback on error
- Clear negative bucket cache when bucket is created via weed shell
- Add integration tests for remote cache with singleflight deduplication

This benefits all clients (S3, HTTP, Hadoop) accessing remote-mounted objects
by preventing redundant cache operations and improving concurrent access performance.

Fixes: https://github.com/seaweedfs/seaweedfs/discussions/7599

* fix: data race in concurrent remote object caching

- Add mutex to protect chunks slice from concurrent append
- Add mutex to protect fetchAndWriteErr from concurrent read/write
- Fix incorrect error check (was checking assignResult.Error instead of parseErr)
- Rename inner variable to avoid shadowing fetchAndWriteErr

* fix: address code review comments

- Remove duplicate remote caching block in GetObjectHandler, keep only singleflight version
- Add mutex protection for concurrent chunk slice and error access (data race fix)
- Use lazy initialization for S3 client in tests to avoid panic during package load
- Fix markdown linting: add language specifier to code fence, blank lines around tables
- Add 'all' target to Makefile as alias for test-with-server
- Remove unused 'util' import

* style: remove emojis from test files

* fix: add defensive checks and sort chunks by offset

- Add nil check and type assertion check for singleflight result
- Sort chunks by offset after concurrent fetching to maintain file order

* fix: improve test diagnostics and path normalization

- runWeedShell now returns error for better test diagnostics
- Add all targets to .PHONY in Makefile (logs-primary, logs-remote, health)
- Strip leading slash from normalizedObject to avoid double slashes in path

---------

Co-authored-by: chrislu <chris.lu@gmail.com>
Co-authored-by: Chris Lu <chrislusf@users.noreply.github.com>
This commit is contained in:
G-OD
2025-12-16 20:41:04 +00:00
committed by GitHub
parent 697b56003d
commit 504b258258
13 changed files with 992 additions and 36 deletions

View File

@@ -3,11 +3,13 @@ package weed_server
import (
"context"
"fmt"
"sort"
"strings"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
@@ -19,6 +21,59 @@ import (
)
func (fs *FilerServer) CacheRemoteObjectToLocalCluster(ctx context.Context, req *filer_pb.CacheRemoteObjectToLocalClusterRequest) (*filer_pb.CacheRemoteObjectToLocalClusterResponse, error) {
// Use singleflight to deduplicate concurrent caching requests for the same object
// This benefits all clients: S3 API, filer HTTP, Hadoop, etc.
cacheKey := req.Directory + "/" + req.Name
result, err, shared := fs.remoteCacheGroup.Do(cacheKey, func() (interface{}, error) {
return fs.doCacheRemoteObjectToLocalCluster(ctx, req)
})
if shared {
glog.V(2).Infof("CacheRemoteObjectToLocalCluster: shared result for %s", cacheKey)
}
if err != nil {
return nil, err
}
if result == nil {
return nil, fmt.Errorf("unexpected nil result from singleflight")
}
resp, ok := result.(*filer_pb.CacheRemoteObjectToLocalClusterResponse)
if !ok {
return nil, fmt.Errorf("unexpected result type from singleflight")
}
return resp, nil
}
// doCacheRemoteObjectToLocalCluster performs the actual caching operation.
// This is called from singleflight, so only one instance runs per object.
func (fs *FilerServer) doCacheRemoteObjectToLocalCluster(ctx context.Context, req *filer_pb.CacheRemoteObjectToLocalClusterRequest) (*filer_pb.CacheRemoteObjectToLocalClusterResponse, error) {
// find the entry first to check if already cached
entry, err := fs.filer.FindEntry(ctx, util.JoinPath(req.Directory, req.Name))
if err == filer_pb.ErrNotFound {
return nil, err
}
if err != nil {
return nil, fmt.Errorf("find entry %s/%s: %v", req.Directory, req.Name, err)
}
resp := &filer_pb.CacheRemoteObjectToLocalClusterResponse{}
// Early return if not a remote-only object or already cached
if entry.Remote == nil || entry.Remote.RemoteSize == 0 {
resp.Entry = entry.ToProtoEntry()
return resp, nil
}
if len(entry.GetChunks()) > 0 {
// Already has local chunks - already cached
glog.V(2).Infof("CacheRemoteObjectToLocalCluster: %s/%s already cached (%d chunks)", req.Directory, req.Name, len(entry.GetChunks()))
resp.Entry = entry.ToProtoEntry()
return resp, nil
}
glog.V(1).Infof("CacheRemoteObjectToLocalCluster: caching %s/%s (remote size: %d)", req.Directory, req.Name, entry.Remote.RemoteSize)
// load all mappings
mappingEntry, err := fs.filer.FindEntry(ctx, util.JoinPath(filer.DirectoryEtcRemote, filer.REMOTE_STORAGE_MOUNT_FILE))
@@ -52,17 +107,6 @@ func (fs *FilerServer) CacheRemoteObjectToLocalCluster(ctx context.Context, req
return nil, fmt.Errorf("unmarshal remote storage conf %s/%s: %v", filer.DirectoryEtcRemote, remoteStorageMountedLocation.Name+filer.REMOTE_STORAGE_CONF_SUFFIX, unMarshalErr)
}
// find the entry
entry, err := fs.filer.FindEntry(ctx, util.JoinPath(req.Directory, req.Name))
if err == filer_pb.ErrNotFound {
return nil, err
}
resp := &filer_pb.CacheRemoteObjectToLocalClusterResponse{}
if entry.Remote == nil || entry.Remote.RemoteSize == 0 {
return resp, nil
}
// detect storage option
so, err := fs.detectStorageOption(ctx, req.Directory, "", "", 0, "", "", "", "")
if err != nil {
@@ -81,6 +125,7 @@ func (fs *FilerServer) CacheRemoteObjectToLocalCluster(ctx context.Context, req
dest := util.FullPath(remoteStorageMountedLocation.Path).Child(string(util.FullPath(req.Directory).Child(req.Name))[len(localMountedDir):])
var chunks []*filer_pb.FileChunk
var chunksMu sync.Mutex
var fetchAndWriteErr error
var wg sync.WaitGroup
@@ -99,16 +144,28 @@ func (fs *FilerServer) CacheRemoteObjectToLocalCluster(ctx context.Context, req
// assign one volume server
assignResult, err := operation.Assign(ctx, fs.filer.GetMaster, fs.grpcDialOption, assignRequest, altRequest)
if err != nil {
fetchAndWriteErr = err
chunksMu.Lock()
if fetchAndWriteErr == nil {
fetchAndWriteErr = err
}
chunksMu.Unlock()
return
}
if assignResult.Error != "" {
fetchAndWriteErr = fmt.Errorf("assign: %v", assignResult.Error)
chunksMu.Lock()
if fetchAndWriteErr == nil {
fetchAndWriteErr = fmt.Errorf("assign: %v", assignResult.Error)
}
chunksMu.Unlock()
return
}
fileId, parseErr := needle.ParseFileIdFromString(assignResult.Fid)
if assignResult.Error != "" {
fetchAndWriteErr = fmt.Errorf("unrecognized file id %s: %v", assignResult.Fid, parseErr)
if parseErr != nil {
chunksMu.Lock()
if fetchAndWriteErr == nil {
fetchAndWriteErr = fmt.Errorf("unrecognized file id %s: %v", assignResult.Fid, parseErr)
}
chunksMu.Unlock()
return
}
@@ -125,7 +182,7 @@ func (fs *FilerServer) CacheRemoteObjectToLocalCluster(ctx context.Context, req
assignedServerAddress := pb.NewServerAddressWithGrpcPort(assignResult.Url, assignResult.GrpcPort)
var etag string
err = operation.WithVolumeServerClient(false, assignedServerAddress, fs.grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
resp, fetchAndWriteErr := volumeServerClient.FetchAndWriteNeedle(context.Background(), &volume_server_pb.FetchAndWriteNeedleRequest{
resp, fetchErr := volumeServerClient.FetchAndWriteNeedle(context.Background(), &volume_server_pb.FetchAndWriteNeedleRequest{
VolumeId: uint32(fileId.VolumeId),
NeedleId: uint64(fileId.Key),
Cookie: uint32(fileId.Cookie),
@@ -140,21 +197,23 @@ func (fs *FilerServer) CacheRemoteObjectToLocalCluster(ctx context.Context, req
Path: string(dest),
},
})
if fetchAndWriteErr != nil {
return fmt.Errorf("volume server %s fetchAndWrite %s: %v", assignResult.Url, dest, fetchAndWriteErr)
} else {
etag = resp.ETag
if fetchErr != nil {
return fmt.Errorf("volume server %s fetchAndWrite %s: %v", assignResult.Url, dest, fetchErr)
}
etag = resp.ETag
return nil
})
if err != nil && fetchAndWriteErr == nil {
fetchAndWriteErr = err
if err != nil {
chunksMu.Lock()
if fetchAndWriteErr == nil {
fetchAndWriteErr = err
}
chunksMu.Unlock()
return
}
chunks = append(chunks, &filer_pb.FileChunk{
chunk := &filer_pb.FileChunk{
FileId: assignResult.Fid,
Offset: localOffset,
Size: uint64(size),
@@ -165,13 +224,24 @@ func (fs *FilerServer) CacheRemoteObjectToLocalCluster(ctx context.Context, req
FileKey: uint64(fileId.Key),
Cookie: uint32(fileId.Cookie),
},
})
}
chunksMu.Lock()
chunks = append(chunks, chunk)
chunksMu.Unlock()
})
}
wg.Wait()
if fetchAndWriteErr != nil {
return nil, fetchAndWriteErr
chunksMu.Lock()
err = fetchAndWriteErr
// Sort chunks by offset to maintain file order
sort.Slice(chunks, func(i, j int) bool {
return chunks[i].Offset < chunks[j].Offset
})
chunksMu.Unlock()
if err != nil {
return nil, err
}
garbage := entry.GetChunks()

View File

@@ -11,6 +11,7 @@ import (
"time"
"github.com/seaweedfs/seaweedfs/weed/stats"
"golang.org/x/sync/singleflight"
"google.golang.org/grpc"
@@ -108,6 +109,9 @@ type FilerServer struct {
// track known metadata listeners
knownListenersLock sync.Mutex
knownListeners map[int32]int32
// deduplicates concurrent remote object caching operations
remoteCacheGroup singleflight.Group
}
func NewFilerServer(defaultMux, readonlyMux *http.ServeMux, option *FilerOption) (fs *FilerServer, err error) {