* feat(filer): add lazy directory listing for remote mounts Directory listings on remote mounts previously only queried the local filer store. With lazy mounts the listing was empty; with eager mounts it went stale over time. Add on-demand directory listing that fetches from remote and caches results with a 5-minute TTL: - Add `ListDirectory` to `RemoteStorageClient` interface (delimiter-based, single-level listing, separate from recursive `Traverse`) - Implement in S3, GCS, and Azure backends using each platform's hierarchical listing API - Add `maybeLazyListFromRemote` to filer: before each directory listing, check if the directory is under a remote mount with an expired cache, fetch from remote, persist entries to the local store, then let existing listing logic run on the populated store - Use singleflight to deduplicate concurrent requests for the same directory - Skip local-only entries (no RemoteEntry) to avoid overwriting unsynced uploads - Errors are logged and swallowed (availability over consistency) * refactor: extract xattr key to constant xattrRemoteListingSyncedAt * feat: make listing cache TTL configurable per mount via listing_cache_ttl_seconds Add listing_cache_ttl_seconds field to RemoteStorageLocation protobuf. When 0 (default), lazy directory listing is disabled for that mount. When >0, enables on-demand directory listing with the specified TTL. Expose as -listingCacheTTL flag on remote.mount command. * refactor: address review feedback for lazy directory listing - Add context.Context to ListDirectory interface and all implementations - Capture startTime before remote call for accurate TTL tracking - Simplify S3 ListDirectory using ListObjectsV2PagesWithContext - Make maybeLazyListFromRemote return void (errors always swallowed) - Remove redundant trailing-slash path manipulation in caller - Update tests to match new signatures * When an existing entry has Remote != nil, we should merge remote metadata into it rather than replacing it. * fix(gcs): wrap ListDirectory iterator error with context The raw iterator error was returned without bucket/path context, making it harder to debug. Wrap it consistently with the S3 pattern. * fix(s3): guard against nil pointer dereference in Traverse and ListDirectory Some S3-compatible backends may return nil for LastModified, Size, or ETag fields. Check for nil before dereferencing to prevent panics. * fix(filer): remove blanket 2-minute timeout from lazy listing context Individual SDK operations (S3, GCS, Azure) already have per-request timeouts and retry policies. The blanket timeout could cut off large directory listings mid-operation even though individual pages were succeeding. * fix(filer): preserve trace context in lazy listing with WithoutCancel Use context.WithoutCancel(ctx) instead of context.Background() so trace/span values from the incoming request are retained for distributed tracing, while still decoupling cancellation. * fix(filer): use Store.FindEntry for internal lookups, add Uid/Gid to files, fix updateDirectoryListingSyncedAt - Use f.Store.FindEntry instead of f.FindEntry for staleness check and child lookups to avoid unnecessary lazy-fetch overhead - Set OS_UID/OS_GID on new file entries for consistency with directories - In updateDirectoryListingSyncedAt, use Store.UpdateEntry for existing directories instead of CreateEntry to avoid deleteChunksIfNotNew and NotifyUpdateEvent side effects * fix(filer): distinguish not-found from store errors in lazy listing Previously, any error from Store.FindEntry was treated as "not found," which could cause entry recreation/overwrite on transient DB failures. Now check for filer_pb.ErrNotFound explicitly and skip entries or bail out on real store errors. * refactor(filer): use errors.Is for ErrNotFound comparisons
169 lines
5.0 KiB
Go
169 lines
5.0 KiB
Go
package remote_storage
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
"google.golang.org/protobuf/proto"
|
|
)
|
|
|
|
const slash = "/"
|
|
|
|
func ParseLocationName(remote string) (locationName string) {
|
|
remote = strings.TrimSuffix(remote, slash)
|
|
parts := strings.SplitN(remote, slash, 2)
|
|
if len(parts) >= 1 {
|
|
return parts[0]
|
|
}
|
|
return
|
|
}
|
|
|
|
func parseBucketLocation(remote string) (loc *remote_pb.RemoteStorageLocation) {
|
|
loc = &remote_pb.RemoteStorageLocation{}
|
|
remote = strings.TrimSuffix(remote, slash)
|
|
parts := strings.SplitN(remote, slash, 3)
|
|
if len(parts) >= 1 {
|
|
loc.Name = parts[0]
|
|
}
|
|
if len(parts) >= 2 {
|
|
loc.Bucket = parts[1]
|
|
}
|
|
loc.Path = remote[len(loc.Name)+1+len(loc.Bucket):]
|
|
if loc.Path == "" {
|
|
loc.Path = slash
|
|
}
|
|
return
|
|
}
|
|
|
|
func parseNoBucketLocation(remote string) (loc *remote_pb.RemoteStorageLocation) {
|
|
loc = &remote_pb.RemoteStorageLocation{}
|
|
remote = strings.TrimSuffix(remote, slash)
|
|
parts := strings.SplitN(remote, slash, 2)
|
|
if len(parts) >= 1 {
|
|
loc.Name = parts[0]
|
|
}
|
|
loc.Path = remote[len(loc.Name):]
|
|
if loc.Path == "" {
|
|
loc.Path = slash
|
|
}
|
|
return
|
|
}
|
|
|
|
func FormatLocation(loc *remote_pb.RemoteStorageLocation) string {
|
|
if loc.Bucket == "" {
|
|
return fmt.Sprintf("%s%s", loc.Name, loc.Path)
|
|
}
|
|
return fmt.Sprintf("%s/%s%s", loc.Name, loc.Bucket, loc.Path)
|
|
}
|
|
|
|
type VisitFunc func(dir string, name string, isDirectory bool, remoteEntry *filer_pb.RemoteEntry) error
|
|
|
|
type Bucket struct {
|
|
Name string
|
|
CreatedAt time.Time
|
|
}
|
|
|
|
// ErrRemoteObjectNotFound is returned by StatFile when the object does not exist in the remote storage backend.
|
|
var ErrRemoteObjectNotFound = errors.New("remote object not found")
|
|
|
|
type RemoteStorageClient interface {
|
|
Traverse(loc *remote_pb.RemoteStorageLocation, visitFn VisitFunc) error
|
|
ListDirectory(ctx context.Context, loc *remote_pb.RemoteStorageLocation, visitFn VisitFunc) error
|
|
StatFile(loc *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error)
|
|
ReadFile(loc *remote_pb.RemoteStorageLocation, offset int64, size int64) (data []byte, err error)
|
|
WriteDirectory(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry) (err error)
|
|
RemoveDirectory(loc *remote_pb.RemoteStorageLocation) (err error)
|
|
WriteFile(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry, reader io.Reader) (remoteEntry *filer_pb.RemoteEntry, err error)
|
|
UpdateFileMetadata(loc *remote_pb.RemoteStorageLocation, oldEntry *filer_pb.Entry, newEntry *filer_pb.Entry) (err error)
|
|
DeleteFile(loc *remote_pb.RemoteStorageLocation) (err error)
|
|
ListBuckets() ([]*Bucket, error)
|
|
CreateBucket(name string) (err error)
|
|
DeleteBucket(name string) (err error)
|
|
}
|
|
|
|
type RemoteStorageClientMaker interface {
|
|
Make(remoteConf *remote_pb.RemoteConf) (RemoteStorageClient, error)
|
|
HasBucket() bool
|
|
}
|
|
|
|
type CachedRemoteStorageClient struct {
|
|
*remote_pb.RemoteConf
|
|
RemoteStorageClient
|
|
}
|
|
|
|
var (
|
|
RemoteStorageClientMakers = make(map[string]RemoteStorageClientMaker)
|
|
remoteStorageClients = make(map[string]CachedRemoteStorageClient)
|
|
remoteStorageClientsLock sync.Mutex
|
|
)
|
|
|
|
func GetAllRemoteStorageNames() string {
|
|
var storageNames []string
|
|
for k := range RemoteStorageClientMakers {
|
|
storageNames = append(storageNames, k)
|
|
}
|
|
sort.Strings(storageNames)
|
|
return strings.Join(storageNames, "|")
|
|
}
|
|
|
|
func GetRemoteStorageNamesHasBucket() string {
|
|
var storageNames []string
|
|
for k, m := range RemoteStorageClientMakers {
|
|
if m.HasBucket() {
|
|
storageNames = append(storageNames, k)
|
|
}
|
|
}
|
|
sort.Strings(storageNames)
|
|
return strings.Join(storageNames, "|")
|
|
}
|
|
|
|
func ParseRemoteLocation(remoteConfType string, remote string) (remoteStorageLocation *remote_pb.RemoteStorageLocation, err error) {
|
|
maker, found := RemoteStorageClientMakers[remoteConfType]
|
|
if !found {
|
|
return nil, fmt.Errorf("remote storage type %s not found", remoteConfType)
|
|
}
|
|
|
|
if !maker.HasBucket() {
|
|
return parseNoBucketLocation(remote), nil
|
|
}
|
|
return parseBucketLocation(remote), nil
|
|
}
|
|
|
|
func makeRemoteStorageClient(remoteConf *remote_pb.RemoteConf) (RemoteStorageClient, error) {
|
|
maker, found := RemoteStorageClientMakers[remoteConf.Type]
|
|
if !found {
|
|
return nil, fmt.Errorf("remote storage type %s not found", remoteConf.Type)
|
|
}
|
|
return maker.Make(remoteConf)
|
|
}
|
|
|
|
func GetRemoteStorage(remoteConf *remote_pb.RemoteConf) (RemoteStorageClient, error) {
|
|
remoteStorageClientsLock.Lock()
|
|
defer remoteStorageClientsLock.Unlock()
|
|
|
|
existingRemoteStorageClient, found := remoteStorageClients[remoteConf.Name]
|
|
if found && proto.Equal(existingRemoteStorageClient.RemoteConf, remoteConf) {
|
|
return existingRemoteStorageClient.RemoteStorageClient, nil
|
|
}
|
|
|
|
newRemoteStorageClient, err := makeRemoteStorageClient(remoteConf)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("make remote storage client %s: %v", remoteConf.Name, err)
|
|
}
|
|
|
|
remoteStorageClients[remoteConf.Name] = CachedRemoteStorageClient{
|
|
RemoteConf: remoteConf,
|
|
RemoteStorageClient: newRemoteStorageClient,
|
|
}
|
|
|
|
return newRemoteStorageClient, nil
|
|
}
|