Files
seaweedFS/weed/filer/filer_lazy_remote.go
Peter Dodd 16f2269a33 feat(filer): lazy metadata pulling (#8454)
* Add remote storage index for lazy metadata pull

Introduces remoteStorageIndex, which maintains a map of filer directory
to remote storage client/location, refreshed periodically from the
filer's mount mappings. Provides lazyFetchFromRemote, ensureRemoteEntryInFiler,
and isRemoteBacked on S3ApiServer as integration points for handler-level
work in a follow-up PR. Nothing is wired into the server yet.

Made-with: Cursor

* Add unit tests for remote storage index and wire field into S3ApiServer

Adds tests covering isEmpty, findForPath (including longest-prefix
resolution), and isRemoteBacked. Also removes a stray PR review
annotation from the index file and adds the remoteStorageIdx field
to S3ApiServer so the package compiles ahead of the wiring PR.

Made-with: Cursor

* Address review comments on remote storage index

- Use filer_pb.CreateEntry helper so resp.Error is checked, not just the RPC error
- Extract keepPrev closure to remove duplicated error-handling in refresh loop
- Add comment explaining availability-over-consistency trade-off on filer save failure

Made-with: Cursor

* Move lazy metadata pull from S3 API to filer

- Add maybeLazyFetchFromRemote in filer: on FindEntry miss, stat remote
  and CreateEntry when path is under a remote mount
- Use singleflight for dedup; context guard prevents CreateEntry recursion
- Availability-over-consistency: return in-memory entry if CreateEntry fails
- Add longest-prefix test for nested mounts in remote_storage_test.go
- Remove remoteStorageIndex, lazyFetchFromRemote, ensureRemoteEntryInFiler,
  doLazyFetch from s3api; filer now owns metadata operations
- Add filer_lazy_remote_test.go with tests for hit, miss, not-found,
  CreateEntry failure, longest-prefix, and FindEntry integration

Made-with: Cursor

* Address review: fix context guard test, add FindMountDirectory comment, remove dead code

Made-with: Cursor

* Nitpicks: restore prev maker in registerStubMaker, instance-scope lazyFetchGroup, nil-check remoteEntry

Made-with: Cursor

* Fix remotePath when mountDir is root: ensure relPath has leading slash

Made-with: Cursor

* filer: decouple lazy-fetch persistence from caller context

Use context.Background() inside the singleflight closure for CreateEntry
so persistence is not cancelled when the winning request's context is
cancelled. Fixes CreateEntry failing for all waiters when the first
caller times out.

Made-with: Cursor

* filer: remove redundant Mode bitwise OR with zero

Made-with: Cursor

* filer: use bounded context for lazy-fetch persistence

Replace context.Background() with context.WithTimeout(30s) and defer
cancel() to prevent indefinite blocking and release resources.

Made-with: Cursor

* filer: use checked type assertion for singleflight result

Made-with: Cursor

* filer: rename persist context vars to avoid shadowing function parameter

Made-with: Cursor
2026-03-03 13:01:10 -08:00

117 lines
3.5 KiB
Go

package filer
import (
"context"
"errors"
"fmt"
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
"github.com/seaweedfs/seaweedfs/weed/util"
)
type lazyFetchContextKey struct{}
// maybeLazyFetchFromRemote is called by FindEntry when the store returns no
// entry for p. If p is under a remote-storage mount, it stats the remote
// object, builds a filer Entry from the result, and persists it via
// CreateEntry with SkipCheckParentDirectory so phantom parent directories
// under the mount are not required.
//
// On a CreateEntry failure after a successful StatFile the in-memory entry is
// still returned (availability over consistency); the singleflight key is
// forgotten so the next lookup retries the filer write.
//
// Returns nil without error when: p is not under a remote mount; the remote
// reports the object does not exist; or any other remote error occurs.
func (f *Filer) maybeLazyFetchFromRemote(ctx context.Context, p util.FullPath) (*Entry, error) {
// Prevent recursive invocation: CreateEntry calls FindEntry, which would
// re-enter this function and deadlock on the singleflight key.
if ctx.Value(lazyFetchContextKey{}) != nil {
return nil, nil
}
if f.RemoteStorage == nil {
return nil, nil
}
mountDir, remoteLoc := f.RemoteStorage.FindMountDirectory(p)
if remoteLoc == nil {
return nil, nil
}
client, _, found := f.RemoteStorage.FindRemoteStorageClient(p)
if !found {
return nil, nil
}
relPath := strings.TrimPrefix(string(p), string(mountDir))
if relPath != "" && !strings.HasPrefix(relPath, "/") {
relPath = "/" + relPath
}
base := strings.TrimSuffix(remoteLoc.Path, "/")
remotePath := "/" + strings.TrimLeft(base+relPath, "/")
objectLoc := &remote_pb.RemoteStorageLocation{
Name: remoteLoc.Name,
Bucket: remoteLoc.Bucket,
Path: remotePath,
}
type lazyFetchResult struct {
entry *Entry
}
key := string(p)
val, err, _ := f.lazyFetchGroup.Do(key, func() (interface{}, error) {
remoteEntry, statErr := client.StatFile(objectLoc)
if statErr != nil {
if errors.Is(statErr, remote_storage.ErrRemoteObjectNotFound) {
glog.V(3).InfofCtx(ctx, "maybeLazyFetchFromRemote: %s not found in remote", p)
} else {
glog.Warningf("maybeLazyFetchFromRemote: stat %s failed: %v", p, statErr)
}
return lazyFetchResult{nil}, nil
}
if remoteEntry == nil {
glog.V(3).InfofCtx(ctx, "maybeLazyFetchFromRemote: %s StatFile returned nil entry", p)
return lazyFetchResult{nil}, nil
}
mtime := time.Unix(remoteEntry.RemoteMtime, 0)
entry := &Entry{
FullPath: p,
Attr: Attr{
Mtime: mtime,
Crtime: mtime,
Mode: 0644,
FileSize: uint64(remoteEntry.RemoteSize),
},
Remote: remoteEntry,
}
persistBaseCtx, cancelPersist := context.WithTimeout(context.Background(), 30*time.Second)
defer cancelPersist()
persistCtx := context.WithValue(persistBaseCtx, lazyFetchContextKey{}, true)
saveErr := f.CreateEntry(persistCtx, entry, false, false, nil, true, f.MaxFilenameLength)
if saveErr != nil {
glog.Warningf("maybeLazyFetchFromRemote: failed to persist filer entry for %s: %v", p, saveErr)
f.lazyFetchGroup.Forget(key)
}
return lazyFetchResult{entry}, nil
})
if err != nil {
return nil, err
}
result, ok := val.(lazyFetchResult)
if !ok {
return nil, fmt.Errorf("maybeLazyFetchFromRemote: unexpected singleflight result type %T for %s", val, p)
}
return result.entry, nil
}