feat(filer): add lazy directory listing for remote mounts (#8615)
* feat(filer): add lazy directory listing for remote mounts Directory listings on remote mounts previously only queried the local filer store. With lazy mounts the listing was empty; with eager mounts it went stale over time. Add on-demand directory listing that fetches from remote and caches results with a 5-minute TTL: - Add `ListDirectory` to `RemoteStorageClient` interface (delimiter-based, single-level listing, separate from recursive `Traverse`) - Implement in S3, GCS, and Azure backends using each platform's hierarchical listing API - Add `maybeLazyListFromRemote` to filer: before each directory listing, check if the directory is under a remote mount with an expired cache, fetch from remote, persist entries to the local store, then let existing listing logic run on the populated store - Use singleflight to deduplicate concurrent requests for the same directory - Skip local-only entries (no RemoteEntry) to avoid overwriting unsynced uploads - Errors are logged and swallowed (availability over consistency) * refactor: extract xattr key to constant xattrRemoteListingSyncedAt * feat: make listing cache TTL configurable per mount via listing_cache_ttl_seconds Add listing_cache_ttl_seconds field to RemoteStorageLocation protobuf. When 0 (default), lazy directory listing is disabled for that mount. When >0, enables on-demand directory listing with the specified TTL. Expose as -listingCacheTTL flag on remote.mount command. * refactor: address review feedback for lazy directory listing - Add context.Context to ListDirectory interface and all implementations - Capture startTime before remote call for accurate TTL tracking - Simplify S3 ListDirectory using ListObjectsV2PagesWithContext - Make maybeLazyListFromRemote return void (errors always swallowed) - Remove redundant trailing-slash path manipulation in caller - Update tests to match new signatures * When an existing entry has Remote != nil, we should merge remote metadata into it rather than replacing it. * fix(gcs): wrap ListDirectory iterator error with context The raw iterator error was returned without bucket/path context, making it harder to debug. Wrap it consistently with the S3 pattern. * fix(s3): guard against nil pointer dereference in Traverse and ListDirectory Some S3-compatible backends may return nil for LastModified, Size, or ETag fields. Check for nil before dereferencing to prevent panics. * fix(filer): remove blanket 2-minute timeout from lazy listing context Individual SDK operations (S3, GCS, Azure) already have per-request timeouts and retry policies. The blanket timeout could cut off large directory listings mid-operation even though individual pages were succeeding. * fix(filer): preserve trace context in lazy listing with WithoutCancel Use context.WithoutCancel(ctx) instead of context.Background() so trace/span values from the incoming request are retained for distributed tracing, while still decoupling cancellation. * fix(filer): use Store.FindEntry for internal lookups, add Uid/Gid to files, fix updateDirectoryListingSyncedAt - Use f.Store.FindEntry instead of f.FindEntry for staleness check and child lookups to avoid unnecessary lazy-fetch overhead - Set OS_UID/OS_GID on new file entries for consistency with directories - In updateDirectoryListingSyncedAt, use Store.UpdateEntry for existing directories instead of CreateEntry to avoid deleteChunksIfNotNew and NotifyUpdateEvent side effects * fix(filer): distinguish not-found from store errors in lazy listing Previously, any error from Store.FindEntry was treated as "not found," which could cause entry recreation/overwrite on transient DB failures. Now check for filer_pb.ErrNotFound explicitly and skip entries or bail out on real store errors. * refactor(filer): use errors.Is for ErrNotFound comparisons
This commit is contained in:
@@ -56,6 +56,7 @@ type Filer struct {
|
||||
FilerConf *FilerConf
|
||||
RemoteStorage *FilerRemoteStorage
|
||||
lazyFetchGroup singleflight.Group
|
||||
lazyListGroup singleflight.Group
|
||||
Dlm *lock_manager.DistributedLockManager
|
||||
MaxFilenameLength uint32
|
||||
deletionQuit chan struct{}
|
||||
@@ -389,6 +390,8 @@ func (f *Filer) FindEntry(ctx context.Context, p util.FullPath) (entry *Entry, e
|
||||
}
|
||||
|
||||
func (f *Filer) doListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix string, eachEntryFunc ListEachEntryFunc) (expiredCount int64, lastFileName string, err error) {
|
||||
f.maybeLazyListFromRemote(ctx, p)
|
||||
|
||||
// Collect expired entries during iteration to avoid deadlock with DB connection pool
|
||||
var expiredEntries []*Entry
|
||||
var s3ExpiredEntries []*Entry
|
||||
|
||||
208
weed/filer/filer_lazy_remote_listing.go
Normal file
208
weed/filer/filer_lazy_remote_listing.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package filer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util"
|
||||
)
|
||||
|
||||
const xattrRemoteListingSyncedAt = "remote.listing.synced_at"
|
||||
|
||||
type lazyListContextKey struct{}
|
||||
|
||||
// maybeLazyListFromRemote populates the local filer store with entries from the
|
||||
// remote storage backend for directory p if the following conditions hold:
|
||||
// - p is under a remote mount with listing_cache_ttl_seconds > 0
|
||||
// - the cached listing has expired (based on the per-mount TTL)
|
||||
//
|
||||
// When listing_cache_ttl_seconds is 0 (the default), lazy listing is disabled
|
||||
// for that mount.
|
||||
//
|
||||
// On success it updates the directory's xattrRemoteListingSyncedAt extended
|
||||
// attribute so subsequent calls within the TTL window are no-ops.
|
||||
//
|
||||
// Errors are logged and swallowed (availability over consistency).
|
||||
func (f *Filer) maybeLazyListFromRemote(ctx context.Context, p util.FullPath) {
|
||||
// Prevent recursion: CreateEntry → FindEntry → doListDirectoryEntries → here
|
||||
if ctx.Value(lazyListContextKey{}) != nil {
|
||||
return
|
||||
}
|
||||
// Also respect the lazy-fetch guard to prevent mutual recursion
|
||||
if ctx.Value(lazyFetchContextKey{}) != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if f.RemoteStorage == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// The ptrie stores mount rules with trailing "/". When p is exactly the
|
||||
// mount directory (e.g. "/buckets/mybucket"), we must also try matching
|
||||
// with a trailing "/" so the trie recognizes the mount root.
|
||||
lookupPath := p
|
||||
mountDir, remoteLoc := f.RemoteStorage.FindMountDirectory(lookupPath)
|
||||
if remoteLoc == nil {
|
||||
lookupPath = util.FullPath(string(p) + "/")
|
||||
mountDir, remoteLoc = f.RemoteStorage.FindMountDirectory(lookupPath)
|
||||
if remoteLoc == nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Lazy listing is opt-in: disabled when TTL is 0
|
||||
if remoteLoc.ListingCacheTtlSeconds <= 0 {
|
||||
return
|
||||
}
|
||||
cacheTTL := time.Duration(remoteLoc.ListingCacheTtlSeconds) * time.Second
|
||||
|
||||
// Check staleness: read the directory entry's extended attributes.
|
||||
// Use Store.FindEntry directly — we only need the local xattr, not lazy-fetch.
|
||||
dirEntry, _ := f.Store.FindEntry(ctx, p)
|
||||
if dirEntry != nil {
|
||||
if syncedAtStr, ok := dirEntry.Extended[xattrRemoteListingSyncedAt]; ok {
|
||||
if syncedAt, err := strconv.ParseInt(string(syncedAtStr), 10, 64); err == nil {
|
||||
if time.Since(time.Unix(syncedAt, 0)) < cacheTTL {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
client, _, found := f.RemoteStorage.FindRemoteStorageClient(lookupPath)
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
|
||||
key := "list:" + string(p)
|
||||
f.lazyListGroup.Do(key, func() (interface{}, error) {
|
||||
startTime := time.Now()
|
||||
objectLoc := MapFullPathToRemoteStorageLocation(mountDir, remoteLoc, p)
|
||||
|
||||
// Decouple from the caller's cancellation/deadline while preserving
|
||||
// trace/span values for distributed tracing.
|
||||
persistCtx := context.WithValue(context.WithoutCancel(ctx), lazyListContextKey{}, true)
|
||||
persistCtx = context.WithValue(persistCtx, lazyFetchContextKey{}, true)
|
||||
|
||||
listErr := client.ListDirectory(persistCtx, objectLoc, func(dir string, name string, isDirectory bool, remoteEntry *filer_pb.RemoteEntry) error {
|
||||
childPath := p.Child(name)
|
||||
|
||||
existingEntry, findErr := f.Store.FindEntry(persistCtx, childPath)
|
||||
if findErr != nil && !errors.Is(findErr, filer_pb.ErrNotFound) {
|
||||
glog.Warningf("maybeLazyListFromRemote: find %s: %v", childPath, findErr)
|
||||
return nil // skip this entry on transient store error
|
||||
}
|
||||
|
||||
// Skip entries that exist locally without a RemoteEntry (local-only uploads)
|
||||
if existingEntry != nil && existingEntry.Remote == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if existingEntry != nil {
|
||||
// Merge: update remote metadata while preserving local state
|
||||
// (Chunks, Extended, Uid/Gid/Mode, etc.)
|
||||
existingEntry.Remote = remoteEntry
|
||||
if !isDirectory && remoteEntry != nil {
|
||||
if remoteEntry.RemoteMtime > 0 {
|
||||
existingEntry.Attr.Mtime = time.Unix(remoteEntry.RemoteMtime, 0)
|
||||
}
|
||||
existingEntry.Attr.FileSize = uint64(remoteEntry.RemoteSize)
|
||||
}
|
||||
if saveErr := f.Store.UpdateEntry(persistCtx, existingEntry); saveErr != nil {
|
||||
glog.Warningf("maybeLazyListFromRemote: update %s: %v", childPath, saveErr)
|
||||
}
|
||||
} else {
|
||||
// New entry not yet in local store
|
||||
var entry *Entry
|
||||
if isDirectory {
|
||||
now := time.Now()
|
||||
entry = &Entry{
|
||||
FullPath: childPath,
|
||||
Attr: Attr{
|
||||
Mtime: now,
|
||||
Crtime: now,
|
||||
Mode: os.ModeDir | 0755,
|
||||
Uid: OS_UID,
|
||||
Gid: OS_GID,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
mtime := time.Now()
|
||||
if remoteEntry != nil && remoteEntry.RemoteMtime > 0 {
|
||||
mtime = time.Unix(remoteEntry.RemoteMtime, 0)
|
||||
}
|
||||
entry = &Entry{
|
||||
FullPath: childPath,
|
||||
Attr: Attr{
|
||||
Mtime: mtime,
|
||||
Crtime: mtime,
|
||||
Mode: 0644,
|
||||
Uid: OS_UID,
|
||||
Gid: OS_GID,
|
||||
},
|
||||
Remote: remoteEntry,
|
||||
}
|
||||
if remoteEntry != nil {
|
||||
entry.Attr.FileSize = uint64(remoteEntry.RemoteSize)
|
||||
}
|
||||
}
|
||||
if saveErr := f.CreateEntry(persistCtx, entry, false, false, nil, true, f.MaxFilenameLength); saveErr != nil {
|
||||
glog.Warningf("maybeLazyListFromRemote: persist %s: %v", childPath, saveErr)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if listErr != nil {
|
||||
glog.Warningf("maybeLazyListFromRemote: list %s: %v", p, listErr)
|
||||
return nil, nil // swallow error
|
||||
}
|
||||
|
||||
// Update the synced_at timestamp on the directory entry
|
||||
f.updateDirectoryListingSyncedAt(persistCtx, p, startTime)
|
||||
|
||||
return nil, nil
|
||||
})
|
||||
}
|
||||
|
||||
func (f *Filer) updateDirectoryListingSyncedAt(ctx context.Context, p util.FullPath, syncTime time.Time) {
|
||||
dirEntry, findErr := f.Store.FindEntry(ctx, p)
|
||||
if findErr != nil && !errors.Is(findErr, filer_pb.ErrNotFound) {
|
||||
glog.Warningf("maybeLazyListFromRemote: find dir %s: %v", p, findErr)
|
||||
return
|
||||
}
|
||||
if errors.Is(findErr, filer_pb.ErrNotFound) {
|
||||
// Directory doesn't exist yet, create it
|
||||
now := time.Now()
|
||||
dirEntry = &Entry{
|
||||
FullPath: p,
|
||||
Attr: Attr{
|
||||
Mtime: now,
|
||||
Crtime: now,
|
||||
Mode: os.ModeDir | 0755,
|
||||
Uid: OS_UID,
|
||||
Gid: OS_GID,
|
||||
},
|
||||
}
|
||||
if dirEntry.Extended == nil {
|
||||
dirEntry.Extended = make(map[string][]byte)
|
||||
}
|
||||
dirEntry.Extended[xattrRemoteListingSyncedAt] = []byte(fmt.Sprintf("%d", syncTime.Unix()))
|
||||
if saveErr := f.CreateEntry(ctx, dirEntry, false, false, nil, true, f.MaxFilenameLength); saveErr != nil {
|
||||
glog.Warningf("maybeLazyListFromRemote: create dir synced_at for %s: %v", p, saveErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
if dirEntry.Extended == nil {
|
||||
dirEntry.Extended = make(map[string][]byte)
|
||||
}
|
||||
dirEntry.Extended[xattrRemoteListingSyncedAt] = []byte(fmt.Sprintf("%d", syncTime.Unix()))
|
||||
if saveErr := f.Store.UpdateEntry(ctx, dirEntry); saveErr != nil {
|
||||
glog.Warningf("maybeLazyListFromRemote: update synced_at for %s: %v", p, saveErr)
|
||||
}
|
||||
}
|
||||
@@ -199,6 +199,9 @@ type stubRemoteClient struct {
|
||||
|
||||
deleteCalls []*remote_pb.RemoteStorageLocation
|
||||
removeCalls []*remote_pb.RemoteStorageLocation
|
||||
|
||||
listDirFn func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error
|
||||
listDirCalls int
|
||||
}
|
||||
|
||||
func (c *stubRemoteClient) StatFile(*remote_pb.RemoteStorageLocation) (*filer_pb.RemoteEntry, error) {
|
||||
@@ -235,6 +238,13 @@ func (c *stubRemoteClient) DeleteFile(loc *remote_pb.RemoteStorageLocation) erro
|
||||
})
|
||||
return c.deleteErr
|
||||
}
|
||||
func (c *stubRemoteClient) ListDirectory(_ context.Context, loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
c.listDirCalls++
|
||||
if c.listDirFn != nil {
|
||||
return c.listDirFn(loc, visitFn)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (c *stubRemoteClient) ListBuckets() ([]*remote_storage.Bucket, error) { return nil, nil }
|
||||
func (c *stubRemoteClient) CreateBucket(string) error { return nil }
|
||||
func (c *stubRemoteClient) DeleteBucket(string) error { return nil }
|
||||
@@ -828,3 +838,264 @@ func TestDeleteEntryMetaAndData_RecursiveFolderDeleteRemotesChildren(t *testing.
|
||||
require.Len(t, stub.removeCalls, 1)
|
||||
assert.Equal(t, "/subdir", stub.removeCalls[0].Path)
|
||||
}
|
||||
|
||||
// --- lazy listing tests ---
|
||||
|
||||
func TestMaybeLazyListFromRemote_PopulatesStoreFromRemote(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_populate"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
if err := visitFn("/", "subdir", true, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := visitFn("/", "file.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000,
|
||||
RemoteSize: 42,
|
||||
RemoteETag: "abc",
|
||||
StorageName: "myliststore",
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "myliststore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "myliststore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 1, stub.listDirCalls)
|
||||
|
||||
// Check that the file was persisted
|
||||
fileEntry := store.getEntry("/buckets/mybucket/file.txt")
|
||||
require.NotNil(t, fileEntry, "file.txt should be persisted")
|
||||
assert.Equal(t, uint64(42), fileEntry.FileSize)
|
||||
assert.NotNil(t, fileEntry.Remote)
|
||||
|
||||
// Check that the subdirectory was persisted
|
||||
dirEntry := store.getEntry("/buckets/mybucket/subdir")
|
||||
require.NotNil(t, dirEntry, "subdir should be persisted")
|
||||
assert.True(t, dirEntry.IsDirectory())
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_DisabledWhenTTLZero(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_disabled"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
return visitFn("/", "file.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000, RemoteSize: 10,
|
||||
})
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "disabledstore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "disabledstore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
// ListingCacheTtlSeconds defaults to 0 → disabled
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 0, stub.listDirCalls, "should not call remote when TTL is 0")
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_TTLCachePreventsSecondCall(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_ttl"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
return visitFn("/", "file.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000, RemoteSize: 10,
|
||||
})
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "ttlstore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "ttlstore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
// First call should hit remote
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 1, stub.listDirCalls)
|
||||
|
||||
// Second call within TTL should be a no-op
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 1, stub.listDirCalls, "should not call remote again within TTL")
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_NotUnderMount(t *testing.T) {
|
||||
rs := NewFilerRemoteStorage()
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/not/a/mount"))
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_SkipsLocalOnlyEntries(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_skiplocal"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
// Remote has a file called "local.txt" too
|
||||
return visitFn("/", "local.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000, RemoteSize: 99,
|
||||
})
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "skipstore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "skipstore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
// Pre-populate a local-only entry (no Remote field)
|
||||
store.entries["/buckets/mybucket/local.txt"] = &Entry{
|
||||
FullPath: "/buckets/mybucket/local.txt",
|
||||
Attr: Attr{Mode: 0644, FileSize: 50},
|
||||
}
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
|
||||
// Local entry should NOT have been overwritten
|
||||
localEntry := store.getEntry("/buckets/mybucket/local.txt")
|
||||
require.NotNil(t, localEntry)
|
||||
assert.Equal(t, uint64(50), localEntry.FileSize, "local-only entry should not be overwritten")
|
||||
assert.Nil(t, localEntry.Remote, "local-only entry should keep nil Remote")
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_MergesExistingRemoteEntry(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_merge"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
return visitFn("/", "cached.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000099, // updated mtime
|
||||
RemoteSize: 200, // updated size
|
||||
RemoteETag: "new-etag",
|
||||
StorageName: "mergestore",
|
||||
})
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "mergestore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "mergestore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
// Pre-populate an existing remote-backed entry with chunks and extended attrs
|
||||
existingChunks := []*filer_pb.FileChunk{
|
||||
{FileId: "1,abc123", Size: 100, Offset: 0},
|
||||
}
|
||||
store.entries["/buckets/mybucket/cached.txt"] = &Entry{
|
||||
FullPath: "/buckets/mybucket/cached.txt",
|
||||
Attr: Attr{
|
||||
Mode: 0644,
|
||||
FileSize: 100,
|
||||
Uid: 1000,
|
||||
Gid: 1000,
|
||||
Mtime: time.Unix(1700000000, 0),
|
||||
Crtime: time.Unix(1699000000, 0),
|
||||
},
|
||||
Chunks: existingChunks,
|
||||
Extended: map[string][]byte{
|
||||
"user.custom": []byte("myvalue"),
|
||||
},
|
||||
Remote: &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000,
|
||||
RemoteSize: 100,
|
||||
RemoteETag: "old-etag",
|
||||
StorageName: "mergestore",
|
||||
},
|
||||
}
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 1, stub.listDirCalls)
|
||||
|
||||
merged := store.getEntry("/buckets/mybucket/cached.txt")
|
||||
require.NotNil(t, merged)
|
||||
|
||||
// Remote metadata should be updated
|
||||
assert.Equal(t, int64(1700000099), merged.Remote.RemoteMtime)
|
||||
assert.Equal(t, int64(200), merged.Remote.RemoteSize)
|
||||
assert.Equal(t, "new-etag", merged.Remote.RemoteETag)
|
||||
assert.Equal(t, uint64(200), merged.FileSize)
|
||||
assert.Equal(t, time.Unix(1700000099, 0), merged.Mtime)
|
||||
|
||||
// Local state should be preserved
|
||||
assert.Equal(t, existingChunks, merged.Chunks, "chunks must be preserved")
|
||||
assert.Equal(t, []byte("myvalue"), merged.Extended["user.custom"], "extended attrs must be preserved")
|
||||
assert.Equal(t, uint32(1000), merged.Uid, "uid must be preserved")
|
||||
assert.Equal(t, uint32(1000), merged.Gid, "gid must be preserved")
|
||||
assert.Equal(t, os.FileMode(0644), merged.Mode, "mode must be preserved")
|
||||
assert.Equal(t, time.Unix(1699000000, 0), merged.Crtime, "crtime must be preserved")
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_ContextGuardPreventsRecursion(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_guard"
|
||||
stub := &stubRemoteClient{}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "guardliststore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "guardliststore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
// With lazyListContextKey set, should be a no-op
|
||||
guardCtx := context.WithValue(context.Background(), lazyListContextKey{}, true)
|
||||
f.maybeLazyListFromRemote(guardCtx, util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 0, stub.listDirCalls)
|
||||
|
||||
// With lazyFetchContextKey set, should also be a no-op
|
||||
fetchCtx := context.WithValue(context.Background(), lazyFetchContextKey{}, true)
|
||||
f.maybeLazyListFromRemote(fetchCtx, util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 0, stub.listDirCalls)
|
||||
}
|
||||
|
||||
@@ -73,4 +73,5 @@ message RemoteStorageLocation {
|
||||
string name = 1;
|
||||
string bucket = 2;
|
||||
string path = 3;
|
||||
int32 listing_cache_ttl_seconds = 4; // 0 = disabled; >0 enables on-demand directory listing with this TTL in seconds
|
||||
}
|
||||
|
||||
@@ -457,12 +457,13 @@ func (x *RemoteStorageMapping) GetPrimaryBucketStorageName() string {
|
||||
}
|
||||
|
||||
type RemoteStorageLocation struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
|
||||
Bucket string `protobuf:"bytes,2,opt,name=bucket,proto3" json:"bucket,omitempty"`
|
||||
Path string `protobuf:"bytes,3,opt,name=path,proto3" json:"path,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
|
||||
Bucket string `protobuf:"bytes,2,opt,name=bucket,proto3" json:"bucket,omitempty"`
|
||||
Path string `protobuf:"bytes,3,opt,name=path,proto3" json:"path,omitempty"`
|
||||
ListingCacheTtlSeconds int32 `protobuf:"varint,4,opt,name=listing_cache_ttl_seconds,json=listingCacheTtlSeconds,proto3" json:"listing_cache_ttl_seconds,omitempty"` // 0 = disabled; >0 enables on-demand directory listing with this TTL in seconds
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *RemoteStorageLocation) Reset() {
|
||||
@@ -516,6 +517,13 @@ func (x *RemoteStorageLocation) GetPath() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *RemoteStorageLocation) GetListingCacheTtlSeconds() int32 {
|
||||
if x != nil {
|
||||
return x.ListingCacheTtlSeconds
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
var File_remote_proto protoreflect.FileDescriptor
|
||||
|
||||
const file_remote_proto_rawDesc = "" +
|
||||
@@ -573,11 +581,12 @@ const file_remote_proto_rawDesc = "" +
|
||||
"\x1bprimary_bucket_storage_name\x18\x02 \x01(\tR\x18primaryBucketStorageName\x1a]\n" +
|
||||
"\rMappingsEntry\x12\x10\n" +
|
||||
"\x03key\x18\x01 \x01(\tR\x03key\x126\n" +
|
||||
"\x05value\x18\x02 \x01(\v2 .remote_pb.RemoteStorageLocationR\x05value:\x028\x01\"W\n" +
|
||||
"\x05value\x18\x02 \x01(\v2 .remote_pb.RemoteStorageLocationR\x05value:\x028\x01\"\x92\x01\n" +
|
||||
"\x15RemoteStorageLocation\x12\x12\n" +
|
||||
"\x04name\x18\x01 \x01(\tR\x04name\x12\x16\n" +
|
||||
"\x06bucket\x18\x02 \x01(\tR\x06bucket\x12\x12\n" +
|
||||
"\x04path\x18\x03 \x01(\tR\x04pathBP\n" +
|
||||
"\x04path\x18\x03 \x01(\tR\x04path\x129\n" +
|
||||
"\x19listing_cache_ttl_seconds\x18\x04 \x01(\x05R\x16listingCacheTtlSecondsBP\n" +
|
||||
"\x10seaweedfs.clientB\n" +
|
||||
"FilerProtoZ0github.com/seaweedfs/seaweedfs/weed/pb/remote_pbb\x06proto3"
|
||||
|
||||
|
||||
@@ -127,6 +127,68 @@ type azureRemoteStorageClient struct {
|
||||
|
||||
var _ = remote_storage.RemoteStorageClient(&azureRemoteStorageClient{})
|
||||
|
||||
func (az *azureRemoteStorageClient) ListDirectory(ctx context.Context, loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) (err error) {
|
||||
pathKey := loc.Path[1:]
|
||||
if pathKey != "" && !strings.HasSuffix(pathKey, "/") {
|
||||
pathKey += "/"
|
||||
}
|
||||
|
||||
containerClient := az.client.ServiceClient().NewContainerClient(loc.Bucket)
|
||||
pager := containerClient.NewListBlobsHierarchyPager("/", &container.ListBlobsHierarchyOptions{
|
||||
Prefix: &pathKey,
|
||||
})
|
||||
|
||||
for pager.More() {
|
||||
resp, pageErr := pager.NextPage(ctx)
|
||||
if pageErr != nil {
|
||||
return fmt.Errorf("azure list directory %s%s: %w", loc.Bucket, loc.Path, pageErr)
|
||||
}
|
||||
|
||||
for _, prefix := range resp.Segment.BlobPrefixes {
|
||||
if prefix.Name == nil {
|
||||
continue
|
||||
}
|
||||
dirKey := "/" + strings.TrimSuffix(*prefix.Name, "/")
|
||||
dir, name := util.FullPath(dirKey).DirAndName()
|
||||
if err = visitFn(dir, name, true, nil); err != nil {
|
||||
return fmt.Errorf("azure processing directory prefix %s: %w", *prefix.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, blobItem := range resp.Segment.BlobItems {
|
||||
if blobItem.Name == nil {
|
||||
continue
|
||||
}
|
||||
key := "/" + *blobItem.Name
|
||||
if strings.HasSuffix(key, "/") {
|
||||
continue // skip directory markers
|
||||
}
|
||||
dir, name := util.FullPath(key).DirAndName()
|
||||
|
||||
remoteEntry := &filer_pb.RemoteEntry{
|
||||
StorageName: az.conf.Name,
|
||||
}
|
||||
if blobItem.Properties != nil {
|
||||
if blobItem.Properties.LastModified != nil {
|
||||
remoteEntry.RemoteMtime = blobItem.Properties.LastModified.Unix()
|
||||
}
|
||||
if blobItem.Properties.ContentLength != nil {
|
||||
remoteEntry.RemoteSize = *blobItem.Properties.ContentLength
|
||||
}
|
||||
if blobItem.Properties.ETag != nil {
|
||||
remoteEntry.RemoteETag = string(*blobItem.Properties.ETag)
|
||||
}
|
||||
}
|
||||
|
||||
if err = visitFn(dir, name, false, remoteEntry); err != nil {
|
||||
return fmt.Errorf("azure processing blob %s: %w", *blobItem.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (az *azureRemoteStorageClient) StatFile(loc *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
||||
key := loc.Path[1:]
|
||||
ctx, cancel := context.WithTimeout(context.Background(), DefaultAzureOpTimeout)
|
||||
|
||||
@@ -131,6 +131,52 @@ func (gcs *gcsRemoteStorageClient) Traverse(loc *remote_pb.RemoteStorageLocation
|
||||
|
||||
const defaultGCSOpTimeout = 30 * time.Second
|
||||
|
||||
func (gcs *gcsRemoteStorageClient) ListDirectory(ctx context.Context, loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) (err error) {
|
||||
pathKey := loc.Path[1:]
|
||||
if pathKey != "" && !strings.HasSuffix(pathKey, "/") {
|
||||
pathKey += "/"
|
||||
}
|
||||
|
||||
objectIterator := gcs.client.Bucket(loc.Bucket).Objects(ctx, &storage.Query{
|
||||
Delimiter: "/",
|
||||
Prefix: pathKey,
|
||||
Versions: false,
|
||||
})
|
||||
|
||||
for {
|
||||
objectAttr, iterErr := objectIterator.Next()
|
||||
if iterErr != nil {
|
||||
if iterErr == iterator.Done {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("list directory %s%s: %w", loc.Bucket, loc.Path, iterErr)
|
||||
}
|
||||
|
||||
if objectAttr.Prefix != "" {
|
||||
// Common prefix → subdirectory
|
||||
dirKey := "/" + strings.TrimSuffix(objectAttr.Prefix, "/")
|
||||
dir, name := util.FullPath(dirKey).DirAndName()
|
||||
if err = visitFn(dir, name, true, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
key := "/" + objectAttr.Name
|
||||
if strings.HasSuffix(key, "/") {
|
||||
continue // skip directory markers
|
||||
}
|
||||
dir, name := util.FullPath(key).DirAndName()
|
||||
if err = visitFn(dir, name, false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: objectAttr.Updated.Unix(),
|
||||
RemoteSize: objectAttr.Size,
|
||||
RemoteETag: objectAttr.Etag,
|
||||
StorageName: gcs.conf.Name,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (gcs *gcsRemoteStorageClient) StatFile(loc *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
||||
key := loc.Path[1:]
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultGCSOpTimeout)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package remote_storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -75,6 +76,7 @@ var ErrRemoteObjectNotFound = errors.New("remote object not found")
|
||||
|
||||
type RemoteStorageClient interface {
|
||||
Traverse(loc *remote_pb.RemoteStorageLocation, visitFn VisitFunc) error
|
||||
ListDirectory(ctx context.Context, loc *remote_pb.RemoteStorageLocation, visitFn VisitFunc) error
|
||||
StatFile(loc *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error)
|
||||
ReadFile(loc *remote_pb.RemoteStorageLocation, offset int64, size int64) (data []byte, err error)
|
||||
WriteDirectory(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry) (err error)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
package s3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/aws/aws-sdk-go/aws"
|
||||
"github.com/aws/aws-sdk-go/aws/awserr"
|
||||
@@ -98,12 +100,19 @@ func (s *s3RemoteStorageClient) Traverse(remote *remote_pb.RemoteStorageLocation
|
||||
key := *content.Key
|
||||
key = "/" + key
|
||||
dir, name := util.FullPath(key).DirAndName()
|
||||
if err := visitFn(dir, name, false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: (*content.LastModified).Unix(),
|
||||
RemoteSize: *content.Size,
|
||||
RemoteETag: *content.ETag,
|
||||
remoteEntry := &filer_pb.RemoteEntry{
|
||||
StorageName: s.conf.Name,
|
||||
}); err != nil {
|
||||
}
|
||||
if content.LastModified != nil {
|
||||
remoteEntry.RemoteMtime = content.LastModified.Unix()
|
||||
}
|
||||
if content.Size != nil {
|
||||
remoteEntry.RemoteSize = *content.Size
|
||||
}
|
||||
if content.ETag != nil {
|
||||
remoteEntry.RemoteETag = *content.ETag
|
||||
}
|
||||
if err := visitFn(dir, name, false, remoteEntry); err != nil {
|
||||
localErr = err
|
||||
return false
|
||||
}
|
||||
@@ -122,6 +131,65 @@ func (s *s3RemoteStorageClient) Traverse(remote *remote_pb.RemoteStorageLocation
|
||||
return
|
||||
}
|
||||
|
||||
func (s *s3RemoteStorageClient) ListDirectory(ctx context.Context, loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
pathKey := loc.Path[1:]
|
||||
if pathKey != "" && !strings.HasSuffix(pathKey, "/") {
|
||||
pathKey += "/"
|
||||
}
|
||||
|
||||
listInput := &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(loc.Bucket),
|
||||
Prefix: aws.String(pathKey),
|
||||
Delimiter: aws.String("/"),
|
||||
}
|
||||
|
||||
var localErr error
|
||||
listErr := s.conn.ListObjectsV2PagesWithContext(ctx, listInput, func(page *s3.ListObjectsV2Output, lastPage bool) bool {
|
||||
for _, prefix := range page.CommonPrefixes {
|
||||
if prefix.Prefix == nil {
|
||||
continue
|
||||
}
|
||||
dirKey := "/" + strings.TrimSuffix(*prefix.Prefix, "/")
|
||||
dir, name := util.FullPath(dirKey).DirAndName()
|
||||
if err := visitFn(dir, name, true, nil); err != nil {
|
||||
localErr = err
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, content := range page.Contents {
|
||||
key := "/" + *content.Key
|
||||
if strings.HasSuffix(key, "/") {
|
||||
continue // skip directory markers
|
||||
}
|
||||
dir, name := util.FullPath(key).DirAndName()
|
||||
remoteEntry := &filer_pb.RemoteEntry{
|
||||
StorageName: s.conf.Name,
|
||||
}
|
||||
if content.LastModified != nil {
|
||||
remoteEntry.RemoteMtime = content.LastModified.Unix()
|
||||
}
|
||||
if content.Size != nil {
|
||||
remoteEntry.RemoteSize = *content.Size
|
||||
}
|
||||
if content.ETag != nil {
|
||||
remoteEntry.RemoteETag = *content.ETag
|
||||
}
|
||||
if err := visitFn(dir, name, false, remoteEntry); err != nil {
|
||||
localErr = err
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
if listErr != nil {
|
||||
return fmt.Errorf("list directory %v: %w", loc, listErr)
|
||||
}
|
||||
if localErr != nil {
|
||||
return fmt.Errorf("process directory %v: %w", loc, localErr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *s3RemoteStorageClient) StatFile(loc *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
||||
resp, err := s.conn.HeadObject(&s3.HeadObjectInput{
|
||||
Bucket: aws.String(loc.Bucket),
|
||||
|
||||
@@ -48,6 +48,8 @@ func (c *commandRemoteMount) Help() string {
|
||||
remote.mount -dir=/xxx -remote=cloud1/bucket -metadataStrategy=lazy
|
||||
# mount and pull one directory in the bucket
|
||||
remote.mount -dir=/xxx -remote=cloud1/bucket/dir1
|
||||
# mount with on-demand directory listing cached for 5 minutes
|
||||
remote.mount -dir=/xxx -remote=cloud1/bucket -listingCacheTTL=300
|
||||
|
||||
# after mount, start a separate process to write updates to remote storage
|
||||
weed filer.remote.sync -filer=<filerHost>:<filerPort> -dir=/xxx
|
||||
@@ -67,6 +69,7 @@ func (c *commandRemoteMount) Do(args []string, commandEnv *CommandEnv, writer io
|
||||
nonEmpty := remoteMountCommand.Bool("nonempty", false, "allows the mounting over a non-empty directory")
|
||||
metadataStrategy := remoteMountCommand.String("metadataStrategy", string(MetadataCacheEager), "lazy: skip upfront metadata pull; eager: full metadata pull (default)")
|
||||
remote := remoteMountCommand.String("remote", "", "a directory in remote storage, ex. <storageName>/<bucket>/path/to/dir")
|
||||
listingCacheTTL := remoteMountCommand.Int("listingCacheTTL", 0, "seconds to cache remote directory listings (0 = disabled)")
|
||||
|
||||
if err = remoteMountCommand.Parse(args); err != nil {
|
||||
return nil
|
||||
@@ -87,6 +90,7 @@ func (c *commandRemoteMount) Do(args []string, commandEnv *CommandEnv, writer io
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
remoteStorageLocation.ListingCacheTtlSeconds = int32(*listingCacheTTL)
|
||||
|
||||
strategy := MetadataCacheStrategy(strings.ToLower(*metadataStrategy))
|
||||
if strategy != MetadataCacheLazy && strategy != MetadataCacheEager {
|
||||
|
||||
Reference in New Issue
Block a user