Files
seaweedFS/weed/filer/remote_storage.go
Peter Dodd 16f2269a33 feat(filer): lazy metadata pulling (#8454)
* Add remote storage index for lazy metadata pull

Introduces remoteStorageIndex, which maintains a map of filer directory
to remote storage client/location, refreshed periodically from the
filer's mount mappings. Provides lazyFetchFromRemote, ensureRemoteEntryInFiler,
and isRemoteBacked on S3ApiServer as integration points for handler-level
work in a follow-up PR. Nothing is wired into the server yet.

Made-with: Cursor

* Add unit tests for remote storage index and wire field into S3ApiServer

Adds tests covering isEmpty, findForPath (including longest-prefix
resolution), and isRemoteBacked. Also removes a stray PR review
annotation from the index file and adds the remoteStorageIdx field
to S3ApiServer so the package compiles ahead of the wiring PR.

Made-with: Cursor

* Address review comments on remote storage index

- Use filer_pb.CreateEntry helper so resp.Error is checked, not just the RPC error
- Extract keepPrev closure to remove duplicated error-handling in refresh loop
- Add comment explaining availability-over-consistency trade-off on filer save failure

Made-with: Cursor

* Move lazy metadata pull from S3 API to filer

- Add maybeLazyFetchFromRemote in filer: on FindEntry miss, stat remote
  and CreateEntry when path is under a remote mount
- Use singleflight for dedup; context guard prevents CreateEntry recursion
- Availability-over-consistency: return in-memory entry if CreateEntry fails
- Add longest-prefix test for nested mounts in remote_storage_test.go
- Remove remoteStorageIndex, lazyFetchFromRemote, ensureRemoteEntryInFiler,
  doLazyFetch from s3api; filer now owns metadata operations
- Add filer_lazy_remote_test.go with tests for hit, miss, not-found,
  CreateEntry failure, longest-prefix, and FindEntry integration

Made-with: Cursor

* Address review: fix context guard test, add FindMountDirectory comment, remove dead code

Made-with: Cursor

* Nitpicks: restore prev maker in registerStubMaker, instance-scope lazyFetchGroup, nil-check remoteEntry

Made-with: Cursor

* Fix remotePath when mountDir is root: ensure relPath has leading slash

Made-with: Cursor

* filer: decouple lazy-fetch persistence from caller context

Use context.Background() inside the singleflight closure for CreateEntry
so persistence is not cancelled when the winning request's context is
cancelled. Fixes CreateEntry failing for all waiters when the first
caller times out.

Made-with: Cursor

* filer: remove redundant Mode bitwise OR with zero

Made-with: Cursor

* filer: use bounded context for lazy-fetch persistence

Replace context.Background() with context.WithTimeout(30s) and defer
cancel() to prevent indefinite blocking and release resources.

Made-with: Cursor

* filer: use checked type assertion for singleflight result

Made-with: Cursor

* filer: rename persist context vars to avoid shadowing function parameter

Made-with: Cursor
2026-03-03 13:01:10 -08:00

186 lines
6.0 KiB
Go

package filer
import (
"context"
"fmt"
"math"
"strings"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
"github.com/seaweedfs/seaweedfs/weed/util"
"google.golang.org/grpc"
"google.golang.org/protobuf/proto"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/viant/ptrie"
)
const REMOTE_STORAGE_CONF_SUFFIX = ".conf"
const REMOTE_STORAGE_MOUNT_FILE = "mount.mapping"
type FilerRemoteStorage struct {
rules ptrie.Trie[*remote_pb.RemoteStorageLocation]
storageNameToConf map[string]*remote_pb.RemoteConf
}
func NewFilerRemoteStorage() (rs *FilerRemoteStorage) {
rs = &FilerRemoteStorage{
rules: ptrie.New[*remote_pb.RemoteStorageLocation](),
storageNameToConf: make(map[string]*remote_pb.RemoteConf),
}
return rs
}
func (rs *FilerRemoteStorage) LoadRemoteStorageConfigurationsAndMapping(filer *Filer) (err error) {
// execute this on filer
limit := int64(math.MaxInt32)
entries, _, err := filer.ListDirectoryEntries(context.Background(), DirectoryEtcRemote, "", false, limit, "", "", "")
if err != nil {
if err == filer_pb.ErrNotFound {
return nil
}
glog.Errorf("read remote storage %s: %v", DirectoryEtcRemote, err)
return
}
for _, entry := range entries {
if entry.Name() == REMOTE_STORAGE_MOUNT_FILE {
if err := rs.loadRemoteStorageMountMapping(entry.Content); err != nil {
return err
}
continue
}
if !strings.HasSuffix(entry.Name(), REMOTE_STORAGE_CONF_SUFFIX) {
return nil
}
conf := &remote_pb.RemoteConf{}
if err := proto.Unmarshal(entry.Content, conf); err != nil {
return fmt.Errorf("unmarshal %s/%s: %v", DirectoryEtcRemote, entry.Name(), err)
}
rs.storageNameToConf[conf.Name] = conf
}
return nil
}
func (rs *FilerRemoteStorage) loadRemoteStorageMountMapping(data []byte) (err error) {
mappings := &remote_pb.RemoteStorageMapping{}
if err := proto.Unmarshal(data, mappings); err != nil {
return fmt.Errorf("unmarshal %s/%s: %v", DirectoryEtcRemote, REMOTE_STORAGE_MOUNT_FILE, err)
}
for dir, storageLocation := range mappings.Mappings {
rs.mapDirectoryToRemoteStorage(util.FullPath(dir), storageLocation)
}
return nil
}
func (rs *FilerRemoteStorage) mapDirectoryToRemoteStorage(dir util.FullPath, loc *remote_pb.RemoteStorageLocation) {
rs.rules.Put([]byte(dir+"/"), loc)
}
// FindMountDirectory returns the mount directory and location for p. When multiple
// mounts match (e.g. /buckets/b and /buckets/b/prefix), ptrie MatchPrefix visits
// shorter prefixes first, so the last match is the longest prefix.
func (rs *FilerRemoteStorage) FindMountDirectory(p util.FullPath) (mountDir util.FullPath, remoteLocation *remote_pb.RemoteStorageLocation) {
rs.rules.MatchPrefix([]byte(p), func(key []byte, value *remote_pb.RemoteStorageLocation) bool {
mountDir = util.FullPath(string(key[:len(key)-1]))
remoteLocation = value
return true
})
return
}
func (rs *FilerRemoteStorage) FindRemoteStorageClient(p util.FullPath) (client remote_storage.RemoteStorageClient, remoteConf *remote_pb.RemoteConf, found bool) {
var storageLocation *remote_pb.RemoteStorageLocation
rs.rules.MatchPrefix([]byte(p), func(key []byte, value *remote_pb.RemoteStorageLocation) bool {
storageLocation = value
return true
})
if storageLocation == nil {
found = false
return
}
return rs.GetRemoteStorageClient(storageLocation.Name)
}
func (rs *FilerRemoteStorage) GetRemoteStorageClient(storageName string) (client remote_storage.RemoteStorageClient, remoteConf *remote_pb.RemoteConf, found bool) {
remoteConf, found = rs.storageNameToConf[storageName]
if !found {
return
}
var err error
if client, err = remote_storage.GetRemoteStorage(remoteConf); err == nil {
found = true
return
}
return
}
func UnmarshalRemoteStorageMappings(oldContent []byte) (mappings *remote_pb.RemoteStorageMapping, err error) {
mappings = &remote_pb.RemoteStorageMapping{
Mappings: make(map[string]*remote_pb.RemoteStorageLocation),
}
if len(oldContent) > 0 {
if err = proto.Unmarshal(oldContent, mappings); err != nil {
glog.Warningf("unmarshal existing mappings: %v", err)
}
}
return
}
func ReadRemoteStorageConf(grpcDialOption grpc.DialOption, filerAddress pb.ServerAddress, storageName string) (conf *remote_pb.RemoteConf, readErr error) {
var oldContent []byte
if readErr = pb.WithFilerClient(false, 0, filerAddress, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
oldContent, readErr = ReadInsideFiler(client, DirectoryEtcRemote, storageName+REMOTE_STORAGE_CONF_SUFFIX)
return readErr
}); readErr != nil {
return nil, readErr
}
// unmarshal storage configuration
conf = &remote_pb.RemoteConf{}
if unMarshalErr := proto.Unmarshal(oldContent, conf); unMarshalErr != nil {
readErr = fmt.Errorf("unmarshal %s/%s: %v", DirectoryEtcRemote, storageName+REMOTE_STORAGE_CONF_SUFFIX, unMarshalErr)
return
}
return
}
func DetectMountInfo(grpcDialOption grpc.DialOption, filerAddress pb.ServerAddress, dir string) (*remote_pb.RemoteStorageMapping, string, *remote_pb.RemoteStorageLocation, *remote_pb.RemoteConf, error) {
mappings, listErr := ReadMountMappings(grpcDialOption, filerAddress)
if listErr != nil {
return nil, "", nil, nil, listErr
}
if dir == "" {
return mappings, "", nil, nil, fmt.Errorf("need to specify '-dir' option")
}
var localMountedDir string
var remoteStorageMountedLocation *remote_pb.RemoteStorageLocation
for k, loc := range mappings.Mappings {
if strings.HasPrefix(dir, k) {
localMountedDir, remoteStorageMountedLocation = k, loc
}
}
if localMountedDir == "" {
return mappings, localMountedDir, remoteStorageMountedLocation, nil, fmt.Errorf("%s is not mounted", dir)
}
// find remote storage configuration
remoteStorageConf, err := ReadRemoteStorageConf(grpcDialOption, filerAddress, remoteStorageMountedLocation.Name)
if err != nil {
return mappings, localMountedDir, remoteStorageMountedLocation, remoteStorageConf, err
}
return mappings, localMountedDir, remoteStorageMountedLocation, remoteStorageConf, nil
}