feat(filer): add lazy directory listing for remote mounts (#8615)
* feat(filer): add lazy directory listing for remote mounts Directory listings on remote mounts previously only queried the local filer store. With lazy mounts the listing was empty; with eager mounts it went stale over time. Add on-demand directory listing that fetches from remote and caches results with a 5-minute TTL: - Add `ListDirectory` to `RemoteStorageClient` interface (delimiter-based, single-level listing, separate from recursive `Traverse`) - Implement in S3, GCS, and Azure backends using each platform's hierarchical listing API - Add `maybeLazyListFromRemote` to filer: before each directory listing, check if the directory is under a remote mount with an expired cache, fetch from remote, persist entries to the local store, then let existing listing logic run on the populated store - Use singleflight to deduplicate concurrent requests for the same directory - Skip local-only entries (no RemoteEntry) to avoid overwriting unsynced uploads - Errors are logged and swallowed (availability over consistency) * refactor: extract xattr key to constant xattrRemoteListingSyncedAt * feat: make listing cache TTL configurable per mount via listing_cache_ttl_seconds Add listing_cache_ttl_seconds field to RemoteStorageLocation protobuf. When 0 (default), lazy directory listing is disabled for that mount. When >0, enables on-demand directory listing with the specified TTL. Expose as -listingCacheTTL flag on remote.mount command. * refactor: address review feedback for lazy directory listing - Add context.Context to ListDirectory interface and all implementations - Capture startTime before remote call for accurate TTL tracking - Simplify S3 ListDirectory using ListObjectsV2PagesWithContext - Make maybeLazyListFromRemote return void (errors always swallowed) - Remove redundant trailing-slash path manipulation in caller - Update tests to match new signatures * When an existing entry has Remote != nil, we should merge remote metadata into it rather than replacing it. * fix(gcs): wrap ListDirectory iterator error with context The raw iterator error was returned without bucket/path context, making it harder to debug. Wrap it consistently with the S3 pattern. * fix(s3): guard against nil pointer dereference in Traverse and ListDirectory Some S3-compatible backends may return nil for LastModified, Size, or ETag fields. Check for nil before dereferencing to prevent panics. * fix(filer): remove blanket 2-minute timeout from lazy listing context Individual SDK operations (S3, GCS, Azure) already have per-request timeouts and retry policies. The blanket timeout could cut off large directory listings mid-operation even though individual pages were succeeding. * fix(filer): preserve trace context in lazy listing with WithoutCancel Use context.WithoutCancel(ctx) instead of context.Background() so trace/span values from the incoming request are retained for distributed tracing, while still decoupling cancellation. * fix(filer): use Store.FindEntry for internal lookups, add Uid/Gid to files, fix updateDirectoryListingSyncedAt - Use f.Store.FindEntry instead of f.FindEntry for staleness check and child lookups to avoid unnecessary lazy-fetch overhead - Set OS_UID/OS_GID on new file entries for consistency with directories - In updateDirectoryListingSyncedAt, use Store.UpdateEntry for existing directories instead of CreateEntry to avoid deleteChunksIfNotNew and NotifyUpdateEvent side effects * fix(filer): distinguish not-found from store errors in lazy listing Previously, any error from Store.FindEntry was treated as "not found," which could cause entry recreation/overwrite on transient DB failures. Now check for filer_pb.ErrNotFound explicitly and skip entries or bail out on real store errors. * refactor(filer): use errors.Is for ErrNotFound comparisons
This commit is contained in:
@@ -199,6 +199,9 @@ type stubRemoteClient struct {
|
||||
|
||||
deleteCalls []*remote_pb.RemoteStorageLocation
|
||||
removeCalls []*remote_pb.RemoteStorageLocation
|
||||
|
||||
listDirFn func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error
|
||||
listDirCalls int
|
||||
}
|
||||
|
||||
func (c *stubRemoteClient) StatFile(*remote_pb.RemoteStorageLocation) (*filer_pb.RemoteEntry, error) {
|
||||
@@ -235,6 +238,13 @@ func (c *stubRemoteClient) DeleteFile(loc *remote_pb.RemoteStorageLocation) erro
|
||||
})
|
||||
return c.deleteErr
|
||||
}
|
||||
func (c *stubRemoteClient) ListDirectory(_ context.Context, loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
c.listDirCalls++
|
||||
if c.listDirFn != nil {
|
||||
return c.listDirFn(loc, visitFn)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (c *stubRemoteClient) ListBuckets() ([]*remote_storage.Bucket, error) { return nil, nil }
|
||||
func (c *stubRemoteClient) CreateBucket(string) error { return nil }
|
||||
func (c *stubRemoteClient) DeleteBucket(string) error { return nil }
|
||||
@@ -828,3 +838,264 @@ func TestDeleteEntryMetaAndData_RecursiveFolderDeleteRemotesChildren(t *testing.
|
||||
require.Len(t, stub.removeCalls, 1)
|
||||
assert.Equal(t, "/subdir", stub.removeCalls[0].Path)
|
||||
}
|
||||
|
||||
// --- lazy listing tests ---
|
||||
|
||||
func TestMaybeLazyListFromRemote_PopulatesStoreFromRemote(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_populate"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
if err := visitFn("/", "subdir", true, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := visitFn("/", "file.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000,
|
||||
RemoteSize: 42,
|
||||
RemoteETag: "abc",
|
||||
StorageName: "myliststore",
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "myliststore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "myliststore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 1, stub.listDirCalls)
|
||||
|
||||
// Check that the file was persisted
|
||||
fileEntry := store.getEntry("/buckets/mybucket/file.txt")
|
||||
require.NotNil(t, fileEntry, "file.txt should be persisted")
|
||||
assert.Equal(t, uint64(42), fileEntry.FileSize)
|
||||
assert.NotNil(t, fileEntry.Remote)
|
||||
|
||||
// Check that the subdirectory was persisted
|
||||
dirEntry := store.getEntry("/buckets/mybucket/subdir")
|
||||
require.NotNil(t, dirEntry, "subdir should be persisted")
|
||||
assert.True(t, dirEntry.IsDirectory())
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_DisabledWhenTTLZero(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_disabled"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
return visitFn("/", "file.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000, RemoteSize: 10,
|
||||
})
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "disabledstore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "disabledstore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
// ListingCacheTtlSeconds defaults to 0 → disabled
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 0, stub.listDirCalls, "should not call remote when TTL is 0")
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_TTLCachePreventsSecondCall(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_ttl"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
return visitFn("/", "file.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000, RemoteSize: 10,
|
||||
})
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "ttlstore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "ttlstore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
// First call should hit remote
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 1, stub.listDirCalls)
|
||||
|
||||
// Second call within TTL should be a no-op
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 1, stub.listDirCalls, "should not call remote again within TTL")
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_NotUnderMount(t *testing.T) {
|
||||
rs := NewFilerRemoteStorage()
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/not/a/mount"))
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_SkipsLocalOnlyEntries(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_skiplocal"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
// Remote has a file called "local.txt" too
|
||||
return visitFn("/", "local.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000, RemoteSize: 99,
|
||||
})
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "skipstore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "skipstore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
// Pre-populate a local-only entry (no Remote field)
|
||||
store.entries["/buckets/mybucket/local.txt"] = &Entry{
|
||||
FullPath: "/buckets/mybucket/local.txt",
|
||||
Attr: Attr{Mode: 0644, FileSize: 50},
|
||||
}
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
|
||||
// Local entry should NOT have been overwritten
|
||||
localEntry := store.getEntry("/buckets/mybucket/local.txt")
|
||||
require.NotNil(t, localEntry)
|
||||
assert.Equal(t, uint64(50), localEntry.FileSize, "local-only entry should not be overwritten")
|
||||
assert.Nil(t, localEntry.Remote, "local-only entry should keep nil Remote")
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_MergesExistingRemoteEntry(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_merge"
|
||||
stub := &stubRemoteClient{
|
||||
listDirFn: func(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) error {
|
||||
return visitFn("/", "cached.txt", false, &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000099, // updated mtime
|
||||
RemoteSize: 200, // updated size
|
||||
RemoteETag: "new-etag",
|
||||
StorageName: "mergestore",
|
||||
})
|
||||
},
|
||||
}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "mergestore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "mergestore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
// Pre-populate an existing remote-backed entry with chunks and extended attrs
|
||||
existingChunks := []*filer_pb.FileChunk{
|
||||
{FileId: "1,abc123", Size: 100, Offset: 0},
|
||||
}
|
||||
store.entries["/buckets/mybucket/cached.txt"] = &Entry{
|
||||
FullPath: "/buckets/mybucket/cached.txt",
|
||||
Attr: Attr{
|
||||
Mode: 0644,
|
||||
FileSize: 100,
|
||||
Uid: 1000,
|
||||
Gid: 1000,
|
||||
Mtime: time.Unix(1700000000, 0),
|
||||
Crtime: time.Unix(1699000000, 0),
|
||||
},
|
||||
Chunks: existingChunks,
|
||||
Extended: map[string][]byte{
|
||||
"user.custom": []byte("myvalue"),
|
||||
},
|
||||
Remote: &filer_pb.RemoteEntry{
|
||||
RemoteMtime: 1700000000,
|
||||
RemoteSize: 100,
|
||||
RemoteETag: "old-etag",
|
||||
StorageName: "mergestore",
|
||||
},
|
||||
}
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
f.maybeLazyListFromRemote(context.Background(), util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 1, stub.listDirCalls)
|
||||
|
||||
merged := store.getEntry("/buckets/mybucket/cached.txt")
|
||||
require.NotNil(t, merged)
|
||||
|
||||
// Remote metadata should be updated
|
||||
assert.Equal(t, int64(1700000099), merged.Remote.RemoteMtime)
|
||||
assert.Equal(t, int64(200), merged.Remote.RemoteSize)
|
||||
assert.Equal(t, "new-etag", merged.Remote.RemoteETag)
|
||||
assert.Equal(t, uint64(200), merged.FileSize)
|
||||
assert.Equal(t, time.Unix(1700000099, 0), merged.Mtime)
|
||||
|
||||
// Local state should be preserved
|
||||
assert.Equal(t, existingChunks, merged.Chunks, "chunks must be preserved")
|
||||
assert.Equal(t, []byte("myvalue"), merged.Extended["user.custom"], "extended attrs must be preserved")
|
||||
assert.Equal(t, uint32(1000), merged.Uid, "uid must be preserved")
|
||||
assert.Equal(t, uint32(1000), merged.Gid, "gid must be preserved")
|
||||
assert.Equal(t, os.FileMode(0644), merged.Mode, "mode must be preserved")
|
||||
assert.Equal(t, time.Unix(1699000000, 0), merged.Crtime, "crtime must be preserved")
|
||||
}
|
||||
|
||||
func TestMaybeLazyListFromRemote_ContextGuardPreventsRecursion(t *testing.T) {
|
||||
const storageType = "stub_lazy_list_guard"
|
||||
stub := &stubRemoteClient{}
|
||||
defer registerStubMaker(t, storageType, stub)()
|
||||
|
||||
conf := &remote_pb.RemoteConf{Name: "guardliststore", Type: storageType}
|
||||
rs := NewFilerRemoteStorage()
|
||||
rs.storageNameToConf[conf.Name] = conf
|
||||
rs.mapDirectoryToRemoteStorage("/buckets/mybucket", &remote_pb.RemoteStorageLocation{
|
||||
Name: "guardliststore",
|
||||
Bucket: "mybucket",
|
||||
Path: "/",
|
||||
ListingCacheTtlSeconds: 300,
|
||||
})
|
||||
|
||||
store := newStubFilerStore()
|
||||
f := newTestFiler(t, store, rs)
|
||||
|
||||
// With lazyListContextKey set, should be a no-op
|
||||
guardCtx := context.WithValue(context.Background(), lazyListContextKey{}, true)
|
||||
f.maybeLazyListFromRemote(guardCtx, util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 0, stub.listDirCalls)
|
||||
|
||||
// With lazyFetchContextKey set, should also be a no-op
|
||||
fetchCtx := context.WithValue(context.Background(), lazyFetchContextKey{}, true)
|
||||
f.maybeLazyListFromRemote(fetchCtx, util.FullPath("/buckets/mybucket"))
|
||||
assert.Equal(t, 0, stub.listDirCalls)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user