s3: fix remote object not caching (#7790)

* s3: fix remote object not caching

* s3: address review comments for remote object caching

- Fix leading slash in object name by using strings.TrimPrefix
- Return cached entry from CacheRemoteObjectToLocalCluster to get updated local chunk locations
- Reuse existing helper function instead of inline gRPC call

* s3/filer: add singleflight deduplication for remote object caching

- Add singleflight.Group to FilerServer to deduplicate concurrent cache operations
- Wrap CacheRemoteObjectToLocalCluster with singleflight to ensure only one
  caching operation runs per object when multiple clients request the same file
- Add early-return check for already-cached objects
- S3 API calls filer gRPC with timeout and graceful fallback on error
- Clear negative bucket cache when bucket is created via weed shell
- Add integration tests for remote cache with singleflight deduplication

This benefits all clients (S3, HTTP, Hadoop) accessing remote-mounted objects
by preventing redundant cache operations and improving concurrent access performance.

Fixes: https://github.com/seaweedfs/seaweedfs/discussions/7599

* fix: data race in concurrent remote object caching

- Add mutex to protect chunks slice from concurrent append
- Add mutex to protect fetchAndWriteErr from concurrent read/write
- Fix incorrect error check (was checking assignResult.Error instead of parseErr)
- Rename inner variable to avoid shadowing fetchAndWriteErr

* fix: address code review comments

- Remove duplicate remote caching block in GetObjectHandler, keep only singleflight version
- Add mutex protection for concurrent chunk slice and error access (data race fix)
- Use lazy initialization for S3 client in tests to avoid panic during package load
- Fix markdown linting: add language specifier to code fence, blank lines around tables
- Add 'all' target to Makefile as alias for test-with-server
- Remove unused 'util' import

* style: remove emojis from test files

* fix: add defensive checks and sort chunks by offset

- Add nil check and type assertion check for singleflight result
- Sort chunks by offset after concurrent fetching to maintain file order

* fix: improve test diagnostics and path normalization

- runWeedShell now returns error for better test diagnostics
- Add all targets to .PHONY in Makefile (logs-primary, logs-remote, health)
- Strip leading slash from normalizedObject to avoid double slashes in path

---------

Co-authored-by: chrislu <chris.lu@gmail.com>
Co-authored-by: Chris Lu <chrislusf@users.noreply.github.com>
This commit is contained in:
G-OD
2025-12-16 20:41:04 +00:00
committed by GitHub
parent 697b56003d
commit 504b258258
13 changed files with 992 additions and 36 deletions

View File

@@ -196,6 +196,9 @@ func (s3a *S3ApiServer) updateBucketConfigCacheFromEntry(entry *filer_pb.Entry)
// Update cache
glog.V(3).Infof("updateBucketConfigCacheFromEntry: updating cache for bucket %s, ObjectLockConfig=%+v", bucket, config.ObjectLockConfig)
s3a.bucketConfigCache.Set(bucket, config)
// Remove from negative cache since bucket now exists
// This is important for buckets created via weed shell or other external means
s3a.bucketConfigCache.RemoveNegativeCache(bucket)
}
// invalidateBucketConfigCache removes a bucket from the configuration cache

View File

@@ -3,14 +3,15 @@ package s3api
import (
"context"
"encoding/json"
"math"
"sync"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
"github.com/seaweedfs/seaweedfs/weed/util"
"math"
"sync"
)
var loadBucketMetadataFromFiler = func(r *BucketRegistry, bucketName string) (*BucketMetaData, error) {
@@ -85,8 +86,10 @@ func (r *BucketRegistry) init() error {
func (r *BucketRegistry) LoadBucketMetadata(entry *filer_pb.Entry) {
bucketMetadata := buildBucketMetadata(r.s3a.iam, entry)
r.metadataCacheLock.Lock()
defer r.metadataCacheLock.Unlock()
r.metadataCache[entry.Name] = bucketMetadata
r.metadataCacheLock.Unlock()
// Remove from notFound cache since bucket now exists
r.unMarkNotFound(entry.Name)
}
func buildBucketMetadata(accountManager AccountManager, entry *filer_pb.Entry) *BucketMetaData {

View File

@@ -659,6 +659,13 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request)
return
}
// Handle remote storage objects: cache to local cluster if object is remote-only
// This uses singleflight to deduplicate concurrent caching requests for the same object
// On cache error, gracefully falls back to streaming from remote
if objectEntryForSSE.IsInRemoteOnly() {
objectEntryForSSE = s3a.cacheRemoteObjectWithDedup(r.Context(), bucket, object, objectEntryForSSE)
}
// Re-check bucket policy with object entry for tag-based conditions (e.g., s3:ExistingObjectTag)
if errCode := s3a.recheckPolicyWithObjectEntry(r, bucket, object, string(s3_constants.ACTION_READ), objectEntryForSSE.Extended, "GetObjectHandler"); errCode != s3err.ErrNone {
s3err.WriteErrorResponse(w, r, errCode)
@@ -3319,3 +3326,63 @@ func (s3a *S3ApiServer) getMultipartInfo(entry *filer_pb.Entry, partNumber int)
// No part boundaries metadata or part not found
return partsCount, nil
}
// cacheRemoteObjectWithDedup caches a remote-only object to the local cluster.
// The filer server handles singleflight deduplication, so all clients (S3, HTTP, Hadoop) benefit.
// On cache error, returns the original entry (streaming from remote will still work).
// Uses a bounded timeout to avoid blocking requests indefinitely.
func (s3a *S3ApiServer) cacheRemoteObjectWithDedup(ctx context.Context, bucket, object string, entry *filer_pb.Entry) *filer_pb.Entry {
// Use a bounded timeout for caching to avoid blocking requests indefinitely
// 30 seconds should be enough for most objects; large objects may timeout but will still stream
const cacheTimeout = 30 * time.Second
cacheCtx, cancel := context.WithTimeout(ctx, cacheTimeout)
defer cancel()
// Build the full path for the object
// Normalize object path: remove duplicate slashes and leading slash to avoid double slashes in path
dir := s3a.option.BucketsPath + "/" + bucket
normalizedObject := strings.TrimPrefix(removeDuplicateSlashes(object), "/")
if idx := strings.LastIndex(normalizedObject, "/"); idx > 0 {
dir = dir + "/" + normalizedObject[:idx]
normalizedObject = normalizedObject[idx+1:]
}
glog.V(2).Infof("cacheRemoteObjectWithDedup: caching %s/%s (remote size: %d)", bucket, object, entry.RemoteEntry.RemoteSize)
// Call the filer's CacheRemoteObjectToLocalCluster via gRPC
// The filer handles singleflight deduplication internally
var cachedEntry *filer_pb.Entry
err := s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
resp, cacheErr := client.CacheRemoteObjectToLocalCluster(cacheCtx, &filer_pb.CacheRemoteObjectToLocalClusterRequest{
Directory: dir,
Name: normalizedObject,
})
if cacheErr != nil {
return cacheErr
}
if resp != nil && resp.Entry != nil {
cachedEntry = resp.Entry
}
return nil
})
if err != nil {
// Caching failed - log and return original entry
// Streaming from remote storage will still work via filer proxy
if errors.Is(err, context.DeadlineExceeded) {
glog.V(1).Infof("cacheRemoteObjectWithDedup: timeout caching %s/%s after %v (will stream from remote)", bucket, object, cacheTimeout)
} else {
glog.Warningf("cacheRemoteObjectWithDedup: failed to cache %s/%s: %v (will stream from remote)", bucket, object, err)
}
return entry
}
// If caching succeeded and we got chunks, use the cached entry's chunks
if cachedEntry != nil && len(cachedEntry.GetChunks()) > 0 {
glog.V(1).Infof("cacheRemoteObjectWithDedup: successfully cached %s/%s (%d chunks)", bucket, object, len(cachedEntry.GetChunks()))
// Preserve original entry metadata but use new chunks
entry.Chunks = cachedEntry.Chunks
}
return entry
}

View File

@@ -70,7 +70,7 @@ type S3ApiServer struct {
inFlightDataSize int64
inFlightUploads int64
inFlightDataLimitCond *sync.Cond
embeddedIam *EmbeddedIamApi // Embedded IAM API server (when enabled)
embeddedIam *EmbeddedIamApi // Embedded IAM API server (when enabled)
}
func NewS3ApiServer(router *mux.Router, option *S3ApiServerOption) (s3ApiServer *S3ApiServer, err error) {