do delete expired entries on s3 list request (#7426)
* do delete expired entries on s3 list request https://github.com/seaweedfs/seaweedfs/issues/6837 * disable delete expires s3 entry in filer * pass opt allowDeleteObjectsByTTL to all servers * delete on get and head * add lifecycle expiration s3 tests * fix opt allowDeleteObjectsByTTL for server * fix test lifecycle expiration * fix IsExpired * fix locationPrefix for updateEntriesTTL * fix s3tests * resolv coderabbitai * GetS3ExpireTime on filer * go mod * clear TtlSeconds for volume * move s3 delete expired entry to filer * filer delete meta and data * del unusing func removeExpiredObject * test s3 put * test s3 put multipart * allowDeleteObjectsByTTL by default * fix pipline tests * rm dublicate SeaweedFSExpiresS3 * revert expiration tests * fix updateTTL * rm log * resolv comment * fix delete version object * fix S3Versioning * fix delete on FindEntry * fix delete chunks * fix sqlite not support concurrent writes/reads * move deletion out of listing transaction; delete entries and empty folders * Revert "fix sqlite not support concurrent writes/reads" This reverts commit 5d5da14e0ed91c613fe5c0ed058f58bb04fba6f0. * clearer handling on recursive empty directory deletion * handle listing errors * strut copying * reuse code to delete empty folders * use iterative approach with a queue to avoid recursive WithFilerClient calls * stop a gRPC stream from the client-side callback is to return a specific error, e.g., io.EOF * still issue UpdateEntry when the flag must be added * errors join * join path * cleaner * add context, sort directories by depth (deepest first) to avoid redundant checks * batched operation, refactoring * prevent deleting bucket * constant * reuse code * more logging * refactoring * s3 TTL time * Safety check --------- Co-authored-by: chrislu <chris.lu@gmail.com>
This commit is contained in:
committed by
GitHub
parent
cc444b1868
commit
084b377f87
@@ -351,37 +351,162 @@ func (f *Filer) FindEntry(ctx context.Context, p util.FullPath) (entry *Entry, e
|
||||
}
|
||||
entry, err = f.Store.FindEntry(ctx, p)
|
||||
if entry != nil && entry.TtlSec > 0 {
|
||||
if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) {
|
||||
if entry.IsExpireS3Enabled() {
|
||||
if entry.GetS3ExpireTime().Before(time.Now()) && !entry.IsS3Versioning() {
|
||||
if delErr := f.doDeleteEntryMetaAndData(ctx, entry, true, false, nil); delErr != nil {
|
||||
glog.ErrorfCtx(ctx, "FindEntry doDeleteEntryMetaAndData %s failed: %v", entry.FullPath, delErr)
|
||||
}
|
||||
return nil, filer_pb.ErrNotFound
|
||||
}
|
||||
} else if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) {
|
||||
f.Store.DeleteOneEntry(ctx, entry)
|
||||
return nil, filer_pb.ErrNotFound
|
||||
}
|
||||
}
|
||||
return
|
||||
|
||||
return entry, err
|
||||
}
|
||||
|
||||
func (f *Filer) doListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix string, eachEntryFunc ListEachEntryFunc) (expiredCount int64, lastFileName string, err error) {
|
||||
// Collect expired entries during iteration to avoid deadlock with DB connection pool
|
||||
var expiredEntries []*Entry
|
||||
var s3ExpiredEntries []*Entry
|
||||
var hasValidEntries bool
|
||||
|
||||
lastFileName, err = f.Store.ListDirectoryPrefixedEntries(ctx, p, startFileName, inclusive, limit, prefix, func(entry *Entry) bool {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
default:
|
||||
if entry.TtlSec > 0 {
|
||||
if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) {
|
||||
f.Store.DeleteOneEntry(ctx, entry)
|
||||
if entry.IsExpireS3Enabled() {
|
||||
if entry.GetS3ExpireTime().Before(time.Now()) && !entry.IsS3Versioning() {
|
||||
// Collect for deletion after iteration completes to avoid DB deadlock
|
||||
s3ExpiredEntries = append(s3ExpiredEntries, entry)
|
||||
expiredCount++
|
||||
return true
|
||||
}
|
||||
} else if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) {
|
||||
// Collect for deletion after iteration completes to avoid DB deadlock
|
||||
expiredEntries = append(expiredEntries, entry)
|
||||
expiredCount++
|
||||
return true
|
||||
}
|
||||
}
|
||||
// Track that we found at least one valid (non-expired) entry
|
||||
hasValidEntries = true
|
||||
return eachEntryFunc(entry)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return expiredCount, lastFileName, err
|
||||
}
|
||||
|
||||
// Delete expired entries after iteration completes to avoid DB connection deadlock
|
||||
if len(s3ExpiredEntries) > 0 || len(expiredEntries) > 0 {
|
||||
for _, entry := range s3ExpiredEntries {
|
||||
if delErr := f.doDeleteEntryMetaAndData(ctx, entry, true, false, nil); delErr != nil {
|
||||
glog.ErrorfCtx(ctx, "doListDirectoryEntries doDeleteEntryMetaAndData %s failed: %v", entry.FullPath, delErr)
|
||||
}
|
||||
}
|
||||
for _, entry := range expiredEntries {
|
||||
if delErr := f.Store.DeleteOneEntry(ctx, entry); delErr != nil {
|
||||
glog.ErrorfCtx(ctx, "doListDirectoryEntries DeleteOneEntry %s failed: %v", entry.FullPath, delErr)
|
||||
}
|
||||
}
|
||||
|
||||
// After expiring entries, the directory might be empty.
|
||||
// Attempt to clean it up and any empty parent directories.
|
||||
if !hasValidEntries && p != "/" && startFileName == "" {
|
||||
stopAtPath := util.FullPath(f.DirBucketsPath)
|
||||
f.DeleteEmptyParentDirectories(ctx, p, stopAtPath)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// DeleteEmptyParentDirectories recursively checks and deletes parent directories if they become empty.
|
||||
// It stops at root "/" or at stopAtPath (if provided).
|
||||
// This is useful for cleaning up directories after deleting files or expired entries.
|
||||
//
|
||||
// IMPORTANT: For safety, dirPath must be under stopAtPath (when stopAtPath is provided).
|
||||
// This prevents accidental deletion of directories outside the intended scope (e.g., outside bucket paths).
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// // After deleting /bucket/dir/subdir/file.txt, clean up empty parent directories
|
||||
// // but stop at the bucket path
|
||||
// parentPath := util.FullPath("/bucket/dir/subdir")
|
||||
// filer.DeleteEmptyParentDirectories(ctx, parentPath, util.FullPath("/bucket"))
|
||||
//
|
||||
// Example with gRPC client:
|
||||
//
|
||||
// if err := pb_filer_client.WithFilerClient(ctx, func(client filer_pb.SeaweedFilerClient) error {
|
||||
// return filer_pb.Traverse(ctx, filer, parentPath, "", func(entry *filer_pb.Entry) error {
|
||||
// // Process entries...
|
||||
// })
|
||||
// }); err == nil {
|
||||
// filer.DeleteEmptyParentDirectories(ctx, parentPath, stopPath)
|
||||
// }
|
||||
func (f *Filer) DeleteEmptyParentDirectories(ctx context.Context, dirPath util.FullPath, stopAtPath util.FullPath) {
|
||||
if dirPath == "/" || dirPath == stopAtPath {
|
||||
return
|
||||
}
|
||||
|
||||
// Safety check: if stopAtPath is provided, dirPath must be under it (root "/" allows everything)
|
||||
stopStr := string(stopAtPath)
|
||||
if stopAtPath != "" && stopStr != "/" && !strings.HasPrefix(string(dirPath)+"/", stopStr+"/") {
|
||||
glog.V(1).InfofCtx(ctx, "DeleteEmptyParentDirectories: %s is not under %s, skipping", dirPath, stopAtPath)
|
||||
return
|
||||
}
|
||||
|
||||
// Additional safety: prevent deletion of bucket-level directories
|
||||
// This protects /buckets/mybucket from being deleted even if empty
|
||||
baseDepth := strings.Count(f.DirBucketsPath, "/")
|
||||
dirDepth := strings.Count(string(dirPath), "/")
|
||||
if dirDepth <= baseDepth+1 {
|
||||
glog.V(2).InfofCtx(ctx, "DeleteEmptyParentDirectories: skipping deletion of bucket-level directory %s", dirPath)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if directory is empty
|
||||
isEmpty, err := f.IsDirectoryEmpty(ctx, dirPath)
|
||||
if err != nil {
|
||||
glog.V(3).InfofCtx(ctx, "DeleteEmptyParentDirectories: error checking %s: %v", dirPath, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !isEmpty {
|
||||
// Directory is not empty, stop checking upward
|
||||
glog.V(3).InfofCtx(ctx, "DeleteEmptyParentDirectories: directory %s is not empty, stopping cleanup", dirPath)
|
||||
return
|
||||
}
|
||||
|
||||
// Directory is empty, try to delete it
|
||||
glog.V(2).InfofCtx(ctx, "DeleteEmptyParentDirectories: deleting empty directory %s", dirPath)
|
||||
parentDir, _ := dirPath.DirAndName()
|
||||
if dirEntry, findErr := f.FindEntry(ctx, dirPath); findErr == nil {
|
||||
if delErr := f.doDeleteEntryMetaAndData(ctx, dirEntry, false, false, nil); delErr == nil {
|
||||
// Successfully deleted, continue checking upwards
|
||||
f.DeleteEmptyParentDirectories(ctx, util.FullPath(parentDir), stopAtPath)
|
||||
} else {
|
||||
// Failed to delete, stop cleanup
|
||||
glog.V(3).InfofCtx(ctx, "DeleteEmptyParentDirectories: failed to delete %s: %v", dirPath, delErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// IsDirectoryEmpty checks if a directory contains any entries
|
||||
func (f *Filer) IsDirectoryEmpty(ctx context.Context, dirPath util.FullPath) (bool, error) {
|
||||
isEmpty := true
|
||||
_, err := f.Store.ListDirectoryPrefixedEntries(ctx, dirPath, "", true, 1, "", func(entry *Entry) bool {
|
||||
isEmpty = false
|
||||
return false // Stop after first entry
|
||||
})
|
||||
return isEmpty, err
|
||||
}
|
||||
|
||||
func (f *Filer) Shutdown() {
|
||||
close(f.deletionQuit)
|
||||
f.LocalMetaLogBuffer.ShutdownLogBuffer()
|
||||
|
||||
Reference in New Issue
Block a user