* s3: fix remote object not caching * s3: address review comments for remote object caching - Fix leading slash in object name by using strings.TrimPrefix - Return cached entry from CacheRemoteObjectToLocalCluster to get updated local chunk locations - Reuse existing helper function instead of inline gRPC call * s3/filer: add singleflight deduplication for remote object caching - Add singleflight.Group to FilerServer to deduplicate concurrent cache operations - Wrap CacheRemoteObjectToLocalCluster with singleflight to ensure only one caching operation runs per object when multiple clients request the same file - Add early-return check for already-cached objects - S3 API calls filer gRPC with timeout and graceful fallback on error - Clear negative bucket cache when bucket is created via weed shell - Add integration tests for remote cache with singleflight deduplication This benefits all clients (S3, HTTP, Hadoop) accessing remote-mounted objects by preventing redundant cache operations and improving concurrent access performance. Fixes: https://github.com/seaweedfs/seaweedfs/discussions/7599 * fix: data race in concurrent remote object caching - Add mutex to protect chunks slice from concurrent append - Add mutex to protect fetchAndWriteErr from concurrent read/write - Fix incorrect error check (was checking assignResult.Error instead of parseErr) - Rename inner variable to avoid shadowing fetchAndWriteErr * fix: address code review comments - Remove duplicate remote caching block in GetObjectHandler, keep only singleflight version - Add mutex protection for concurrent chunk slice and error access (data race fix) - Use lazy initialization for S3 client in tests to avoid panic during package load - Fix markdown linting: add language specifier to code fence, blank lines around tables - Add 'all' target to Makefile as alias for test-with-server - Remove unused 'util' import * style: remove emojis from test files * fix: add defensive checks and sort chunks by offset - Add nil check and type assertion check for singleflight result - Sort chunks by offset after concurrent fetching to maintain file order * fix: improve test diagnostics and path normalization - runWeedShell now returns error for better test diagnostics - Add all targets to .PHONY in Makefile (logs-primary, logs-remote, health) - Strip leading slash from normalizedObject to avoid double slashes in path --------- Co-authored-by: chrislu <chris.lu@gmail.com> Co-authored-by: Chris Lu <chrislusf@users.noreply.github.com>
45 lines
1.9 KiB
Go
45 lines
1.9 KiB
Go
package filer
|
|
|
|
import (
|
|
"context"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
func (entry *Entry) IsInRemoteOnly() bool {
|
|
return len(entry.GetChunks()) == 0 && entry.Remote != nil && entry.Remote.RemoteSize > 0
|
|
}
|
|
|
|
func MapFullPathToRemoteStorageLocation(localMountedDir util.FullPath, remoteMountedLocation *remote_pb.RemoteStorageLocation, fp util.FullPath) *remote_pb.RemoteStorageLocation {
|
|
remoteLocation := &remote_pb.RemoteStorageLocation{
|
|
Name: remoteMountedLocation.Name,
|
|
Bucket: remoteMountedLocation.Bucket,
|
|
Path: remoteMountedLocation.Path,
|
|
}
|
|
remoteLocation.Path = string(util.FullPath(remoteLocation.Path).Child(string(fp)[len(localMountedDir):]))
|
|
return remoteLocation
|
|
}
|
|
|
|
func MapRemoteStorageLocationPathToFullPath(localMountedDir util.FullPath, remoteMountedLocation *remote_pb.RemoteStorageLocation, remoteLocationPath string) (fp util.FullPath) {
|
|
return localMountedDir.Child(remoteLocationPath[len(remoteMountedLocation.Path):])
|
|
}
|
|
|
|
// CacheRemoteObjectToLocalCluster caches a remote object to the local cluster.
|
|
// It returns the updated entry with local chunk locations.
|
|
// Parameters remoteConf and remoteLocation are kept for backward compatibility but are not used.
|
|
func CacheRemoteObjectToLocalCluster(filerClient filer_pb.FilerClient, remoteConf *remote_pb.RemoteConf, remoteLocation *remote_pb.RemoteStorageLocation, parent util.FullPath, entry *filer_pb.Entry) (*filer_pb.Entry, error) {
|
|
var cachedEntry *filer_pb.Entry
|
|
err := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
resp, cacheErr := client.CacheRemoteObjectToLocalCluster(context.Background(), &filer_pb.CacheRemoteObjectToLocalClusterRequest{
|
|
Directory: string(parent),
|
|
Name: entry.Name,
|
|
})
|
|
if cacheErr == nil && resp != nil {
|
|
cachedEntry = resp.Entry
|
|
}
|
|
return cacheErr
|
|
})
|
|
return cachedEntry, err
|
|
}
|