* feat(filer): add lazy directory listing for remote mounts Directory listings on remote mounts previously only queried the local filer store. With lazy mounts the listing was empty; with eager mounts it went stale over time. Add on-demand directory listing that fetches from remote and caches results with a 5-minute TTL: - Add `ListDirectory` to `RemoteStorageClient` interface (delimiter-based, single-level listing, separate from recursive `Traverse`) - Implement in S3, GCS, and Azure backends using each platform's hierarchical listing API - Add `maybeLazyListFromRemote` to filer: before each directory listing, check if the directory is under a remote mount with an expired cache, fetch from remote, persist entries to the local store, then let existing listing logic run on the populated store - Use singleflight to deduplicate concurrent requests for the same directory - Skip local-only entries (no RemoteEntry) to avoid overwriting unsynced uploads - Errors are logged and swallowed (availability over consistency) * refactor: extract xattr key to constant xattrRemoteListingSyncedAt * feat: make listing cache TTL configurable per mount via listing_cache_ttl_seconds Add listing_cache_ttl_seconds field to RemoteStorageLocation protobuf. When 0 (default), lazy directory listing is disabled for that mount. When >0, enables on-demand directory listing with the specified TTL. Expose as -listingCacheTTL flag on remote.mount command. * refactor: address review feedback for lazy directory listing - Add context.Context to ListDirectory interface and all implementations - Capture startTime before remote call for accurate TTL tracking - Simplify S3 ListDirectory using ListObjectsV2PagesWithContext - Make maybeLazyListFromRemote return void (errors always swallowed) - Remove redundant trailing-slash path manipulation in caller - Update tests to match new signatures * When an existing entry has Remote != nil, we should merge remote metadata into it rather than replacing it. * fix(gcs): wrap ListDirectory iterator error with context The raw iterator error was returned without bucket/path context, making it harder to debug. Wrap it consistently with the S3 pattern. * fix(s3): guard against nil pointer dereference in Traverse and ListDirectory Some S3-compatible backends may return nil for LastModified, Size, or ETag fields. Check for nil before dereferencing to prevent panics. * fix(filer): remove blanket 2-minute timeout from lazy listing context Individual SDK operations (S3, GCS, Azure) already have per-request timeouts and retry policies. The blanket timeout could cut off large directory listings mid-operation even though individual pages were succeeding. * fix(filer): preserve trace context in lazy listing with WithoutCancel Use context.WithoutCancel(ctx) instead of context.Background() so trace/span values from the incoming request are retained for distributed tracing, while still decoupling cancellation. * fix(filer): use Store.FindEntry for internal lookups, add Uid/Gid to files, fix updateDirectoryListingSyncedAt - Use f.Store.FindEntry instead of f.FindEntry for staleness check and child lookups to avoid unnecessary lazy-fetch overhead - Set OS_UID/OS_GID on new file entries for consistency with directories - In updateDirectoryListingSyncedAt, use Store.UpdateEntry for existing directories instead of CreateEntry to avoid deleteChunksIfNotNew and NotifyUpdateEvent side effects * fix(filer): distinguish not-found from store errors in lazy listing Previously, any error from Store.FindEntry was treated as "not found," which could cause entry recreation/overwrite on transient DB failures. Now check for filer_pb.ErrNotFound explicitly and skip entries or bail out on real store errors. * refactor(filer): use errors.Is for ErrNotFound comparisons
217 lines
6.9 KiB
Go
217 lines
6.9 KiB
Go
package shell
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
"google.golang.org/protobuf/proto"
|
|
)
|
|
|
|
type MetadataCacheStrategy string
|
|
|
|
const (
|
|
MetadataCacheEager MetadataCacheStrategy = "eager"
|
|
MetadataCacheLazy MetadataCacheStrategy = "lazy"
|
|
)
|
|
|
|
func init() {
|
|
Commands = append(Commands, &commandRemoteMount{})
|
|
}
|
|
|
|
type commandRemoteMount struct {
|
|
}
|
|
|
|
func (c *commandRemoteMount) Name() string {
|
|
return "remote.mount"
|
|
}
|
|
|
|
func (c *commandRemoteMount) Help() string {
|
|
return `mount remote storage and optionally pull its metadata
|
|
|
|
# assume a remote storage is configured to name "cloud1"
|
|
remote.configure -name=cloud1 -type=s3 -s3.access_key=xxx -s3.secret_key=yyy
|
|
|
|
# mount and pull one bucket (full upfront metadata sync)
|
|
remote.mount -dir=/xxx -remote=cloud1/bucket
|
|
# mount without upfront sync; metadata is fetched lazily on access
|
|
remote.mount -dir=/xxx -remote=cloud1/bucket -metadataStrategy=lazy
|
|
# mount and pull one directory in the bucket
|
|
remote.mount -dir=/xxx -remote=cloud1/bucket/dir1
|
|
# mount with on-demand directory listing cached for 5 minutes
|
|
remote.mount -dir=/xxx -remote=cloud1/bucket -listingCacheTTL=300
|
|
|
|
# after mount, start a separate process to write updates to remote storage
|
|
weed filer.remote.sync -filer=<filerHost>:<filerPort> -dir=/xxx
|
|
|
|
`
|
|
}
|
|
|
|
func (c *commandRemoteMount) HasTag(CommandTag) bool {
|
|
return false
|
|
}
|
|
|
|
func (c *commandRemoteMount) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
|
|
|
remoteMountCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
|
|
|
dir := remoteMountCommand.String("dir", "", "a directory in filer")
|
|
nonEmpty := remoteMountCommand.Bool("nonempty", false, "allows the mounting over a non-empty directory")
|
|
metadataStrategy := remoteMountCommand.String("metadataStrategy", string(MetadataCacheEager), "lazy: skip upfront metadata pull; eager: full metadata pull (default)")
|
|
remote := remoteMountCommand.String("remote", "", "a directory in remote storage, ex. <storageName>/<bucket>/path/to/dir")
|
|
listingCacheTTL := remoteMountCommand.Int("listingCacheTTL", 0, "seconds to cache remote directory listings (0 = disabled)")
|
|
|
|
if err = remoteMountCommand.Parse(args); err != nil {
|
|
return nil
|
|
}
|
|
|
|
if *dir == "" {
|
|
_, err = listExistingRemoteStorageMounts(commandEnv, writer)
|
|
return err
|
|
}
|
|
|
|
// find configuration for remote storage
|
|
remoteConf, err := filer.ReadRemoteStorageConf(commandEnv.option.GrpcDialOption, commandEnv.option.FilerAddress, remote_storage.ParseLocationName(*remote))
|
|
if err != nil {
|
|
return fmt.Errorf("find configuration for %s: %v", *remote, err)
|
|
}
|
|
|
|
remoteStorageLocation, err := remote_storage.ParseRemoteLocation(remoteConf.Type, *remote)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
remoteStorageLocation.ListingCacheTtlSeconds = int32(*listingCacheTTL)
|
|
|
|
strategy := MetadataCacheStrategy(strings.ToLower(*metadataStrategy))
|
|
if strategy != MetadataCacheLazy && strategy != MetadataCacheEager {
|
|
return fmt.Errorf("metadataStrategy must be %s or %s, got %q", MetadataCacheLazy, MetadataCacheEager, *metadataStrategy)
|
|
}
|
|
|
|
if err = ensureMountDirectory(commandEnv, *dir, *nonEmpty, remoteConf); err != nil {
|
|
return fmt.Errorf("mount setup: %w", err)
|
|
}
|
|
|
|
if strategy == MetadataCacheEager {
|
|
if err = pullMetadata(commandEnv, writer, util.FullPath(*dir), remoteStorageLocation, util.FullPath(*dir), remoteConf); err != nil {
|
|
return fmt.Errorf("cache metadata: %w", err)
|
|
}
|
|
}
|
|
|
|
// store a mount configuration in filer
|
|
if err = filer.InsertMountMapping(commandEnv, *dir, remoteStorageLocation); err != nil {
|
|
return fmt.Errorf("save mount mapping: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func listExistingRemoteStorageMounts(commandEnv *CommandEnv, writer io.Writer) (mappings *remote_pb.RemoteStorageMapping, err error) {
|
|
|
|
// read current mapping
|
|
mappings, err = filer.ReadMountMappings(commandEnv.option.GrpcDialOption, commandEnv.option.FilerAddress)
|
|
if err != nil {
|
|
return mappings, err
|
|
}
|
|
|
|
jsonPrintln(writer, mappings)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
func jsonPrintln(writer io.Writer, message proto.Message) error {
|
|
return filer.ProtoToText(writer, message)
|
|
}
|
|
|
|
func ensureMountDirectory(commandEnv *CommandEnv, dir string, nonEmpty bool, remoteConf *remote_pb.RemoteConf) error {
|
|
return commandEnv.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
parent, name := util.FullPath(dir).DirAndName()
|
|
_, lookupErr := filer_pb.LookupEntry(context.Background(), client, &filer_pb.LookupDirectoryEntryRequest{
|
|
Directory: parent,
|
|
Name: name,
|
|
})
|
|
if lookupErr != nil {
|
|
if errors.Is(lookupErr, filer_pb.ErrNotFound) {
|
|
_, createErr := client.CreateEntry(context.Background(), &filer_pb.CreateEntryRequest{
|
|
Directory: parent,
|
|
Entry: &filer_pb.Entry{
|
|
Name: name,
|
|
IsDirectory: true,
|
|
Attributes: &filer_pb.FuseAttributes{
|
|
Mtime: time.Now().Unix(),
|
|
Crtime: time.Now().Unix(),
|
|
FileMode: uint32(0755 | os.ModeDir),
|
|
},
|
|
RemoteEntry: &filer_pb.RemoteEntry{
|
|
StorageName: remoteConf.Name,
|
|
},
|
|
},
|
|
})
|
|
return createErr
|
|
}
|
|
return lookupErr
|
|
}
|
|
|
|
mountToDirIsEmpty := true
|
|
listErr := filer_pb.SeaweedList(context.Background(), client, dir, "", func(entry *filer_pb.Entry, isLast bool) error {
|
|
mountToDirIsEmpty = false
|
|
return nil
|
|
}, "", false, 1)
|
|
|
|
if listErr != nil {
|
|
return fmt.Errorf("list %s: %v", dir, listErr)
|
|
}
|
|
|
|
if !mountToDirIsEmpty {
|
|
if !nonEmpty {
|
|
return fmt.Errorf("dir %s is not empty", dir)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// if an entry has synchronized metadata but has not synchronized content
|
|
//
|
|
// entry.Attributes.FileSize == entry.RemoteEntry.RemoteSize
|
|
// entry.Attributes.Mtime == entry.RemoteEntry.RemoteMtime
|
|
// entry.RemoteEntry.LastLocalSyncTsNs == 0
|
|
//
|
|
// if an entry has synchronized metadata but has synchronized content before
|
|
//
|
|
// entry.Attributes.FileSize == entry.RemoteEntry.RemoteSize
|
|
// entry.Attributes.Mtime == entry.RemoteEntry.RemoteMtime
|
|
// entry.RemoteEntry.LastLocalSyncTsNs > 0
|
|
//
|
|
// if an entry has synchronized metadata but has new updates
|
|
//
|
|
// entry.Attributes.Mtime * 1,000,000,000 > entry.RemoteEntry.LastLocalSyncTsNs
|
|
func doSaveRemoteEntry(client filer_pb.SeaweedFilerClient, localDir string, existingEntry *filer_pb.Entry, remoteEntry *filer_pb.RemoteEntry) error {
|
|
existingEntry.RemoteEntry = remoteEntry
|
|
existingEntry.Attributes.FileSize = uint64(remoteEntry.RemoteSize)
|
|
existingEntry.Attributes.Mtime = remoteEntry.RemoteMtime
|
|
existingEntry.Attributes.Md5 = nil
|
|
existingEntry.Attributes.TtlSec = 0 // Remote entries should not have TTL
|
|
existingEntry.Chunks = nil
|
|
existingEntry.Content = nil
|
|
_, updateErr := client.UpdateEntry(context.Background(), &filer_pb.UpdateEntryRequest{
|
|
Directory: localDir,
|
|
Entry: existingEntry,
|
|
})
|
|
if updateErr != nil {
|
|
return updateErr
|
|
}
|
|
return nil
|
|
}
|