* filer: async empty folder cleanup via metadata events Implements asynchronous empty folder cleanup when files are deleted in S3. Key changes: 1. EmptyFolderCleaner - New component that handles folder cleanup: - Uses consistent hashing (LockRing) to determine folder ownership - Each filer owns specific folders, avoiding duplicate cleanup work - Debounces delete events (10s delay) to batch multiple deletes - Caches rough folder counts to skip unnecessary checks - Cancels pending cleanup when new files are created - Handles both file and subdirectory deletions 2. Integration with metadata events: - Listens to both local and remote filer metadata events - Processes create/delete/rename events to track folder state - Only processes folders under /buckets/<bucket>/... 3. Removed synchronous empty folder cleanup from S3 handlers: - DeleteObjectHandler no longer calls DoDeleteEmptyParentDirectories - DeleteMultipleObjectsHandler no longer tracks/cleans directories - Cleanup now happens asynchronously via metadata events Benefits: - Non-blocking: S3 delete requests return immediately - Coordinated: Only one filer (the owner) cleans each folder - Efficient: Batching and caching reduce unnecessary checks - Event-driven: Folder deletion triggers parent folder check automatically * filer: add CleanupQueue data structure for deduplicated folder cleanup CleanupQueue uses a linked list for FIFO ordering and a hashmap for O(1) deduplication. Processing is triggered when: - Queue size reaches maxSize (default 1000), OR - Oldest item exceeds maxAge (default 10 minutes) Key features: - O(1) Add, Remove, Pop, Contains operations - Duplicate folders are ignored (keeps original position/time) - Testable with injectable time function - Thread-safe with mutex protection * filer: use CleanupQueue for empty folder cleanup Replace timer-per-folder approach with queue-based processing: - Use CleanupQueue for deduplication and ordered processing - Process queue when full (1000 items) or oldest item exceeds 10 minutes - Background processor checks queue every 10 seconds - Remove from queue on create events to cancel pending cleanup Benefits: - Bounded memory: queue has max size, not unlimited timers - Efficient: O(1) add/remove/contains operations - Batch processing: handle many folders efficiently - Better for high-volume delete scenarios * filer: CleanupQueue.Add moves duplicate to back with updated time When adding a folder that already exists in the queue: - Remove it from its current position - Add it to the back of the queue - Update the queue time to current time This ensures that folders with recent delete activity are processed later, giving more time for additional deletes to occur. * filer: CleanupQueue uses event time and inserts in sorted order Changes: - Add() now takes eventTime parameter instead of using current time - Insert items in time-sorted order (oldest at front) to handle out-of-order events - When updating duplicate with newer time, reposition to maintain sort order - Ignore updates with older time (keep existing later time) This ensures proper ordering when processing events from distributed filers where event arrival order may not match event occurrence order. * filer: remove unused CleanupQueue functions (SetNowFunc, GetAll) Removed test-only functions: - SetNowFunc: tests now use real time with past event times - GetAll: tests now use Pop() to verify order Kept functions used in production: - Peek: used in filer_notify_read.go - OldestAge: used in empty_folder_cleaner.go logging * filer: initialize cache entry on first delete/create event Previously, roughCount was only updated if the cache entry already existed, but entries were only created during executeCleanup. This meant delete/create events before the first cleanup didn't track the count. Now create the cache entry on first event, so roughCount properly tracks all changes from the start. * filer: skip adding to cleanup queue if roughCount > 0 If the cached roughCount indicates there are still items in the folder, don't bother adding it to the cleanup queue. This avoids unnecessary queue entries and reduces wasted cleanup checks. * filer: don't create cache entry on create event Only update roughCount if the folder is already being tracked. New folders don't need tracking until we see a delete event. * filer: move empty folder cleanup to its own package - Created weed/filer/empty_folder_cleanup package - Defined FilerOperations interface to break circular dependency - Added CountDirectoryEntries method to Filer - Exported IsUnderPath and IsUnderBucketPath helper functions * filer: make isUnderPath and isUnderBucketPath private These helpers are only used within the empty_folder_cleanup package.
135 lines
3.8 KiB
Go
135 lines
3.8 KiB
Go
package filer
|
|
|
|
import (
|
|
"bytes"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
// onMetadataChangeEvent is triggered after filer processed change events from local or remote filers
|
|
func (f *Filer) onMetadataChangeEvent(event *filer_pb.SubscribeMetadataResponse) {
|
|
f.maybeReloadFilerConfiguration(event)
|
|
f.maybeReloadRemoteStorageConfigurationAndMapping(event)
|
|
f.onBucketEvents(event)
|
|
f.onEmptyFolderCleanupEvents(event)
|
|
}
|
|
|
|
func (f *Filer) onBucketEvents(event *filer_pb.SubscribeMetadataResponse) {
|
|
message := event.EventNotification
|
|
|
|
if f.DirBucketsPath == event.Directory {
|
|
if filer_pb.IsCreate(event) {
|
|
if message.NewEntry.IsDirectory {
|
|
f.Store.OnBucketCreation(message.NewEntry.Name)
|
|
}
|
|
}
|
|
if filer_pb.IsDelete(event) {
|
|
if message.OldEntry.IsDirectory {
|
|
f.Store.OnBucketDeletion(message.OldEntry.Name)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// onEmptyFolderCleanupEvents handles create/delete events for empty folder cleanup
|
|
func (f *Filer) onEmptyFolderCleanupEvents(event *filer_pb.SubscribeMetadataResponse) {
|
|
if f.EmptyFolderCleaner == nil || !f.EmptyFolderCleaner.IsEnabled() {
|
|
return
|
|
}
|
|
|
|
message := event.EventNotification
|
|
directory := event.Directory
|
|
eventTime := time.Unix(0, event.TsNs)
|
|
|
|
// Handle delete events - trigger folder cleanup check
|
|
if filer_pb.IsDelete(event) && message.OldEntry != nil {
|
|
f.EmptyFolderCleaner.OnDeleteEvent(directory, message.OldEntry.Name, message.OldEntry.IsDirectory, eventTime)
|
|
}
|
|
|
|
// Handle create events - cancel pending cleanup for the folder
|
|
if filer_pb.IsCreate(event) && message.NewEntry != nil {
|
|
f.EmptyFolderCleaner.OnCreateEvent(directory, message.NewEntry.Name, message.NewEntry.IsDirectory)
|
|
}
|
|
|
|
// Handle rename/move events
|
|
if filer_pb.IsRename(event) {
|
|
// Treat the old location as a delete
|
|
if message.OldEntry != nil {
|
|
f.EmptyFolderCleaner.OnDeleteEvent(directory, message.OldEntry.Name, message.OldEntry.IsDirectory, eventTime)
|
|
}
|
|
// Treat the new location as a create
|
|
if message.NewEntry != nil {
|
|
newDir := message.NewParentPath
|
|
if newDir == "" {
|
|
newDir = directory
|
|
}
|
|
f.EmptyFolderCleaner.OnCreateEvent(newDir, message.NewEntry.Name, message.NewEntry.IsDirectory)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (f *Filer) maybeReloadFilerConfiguration(event *filer_pb.SubscribeMetadataResponse) {
|
|
if DirectoryEtcSeaweedFS != event.Directory {
|
|
if DirectoryEtcSeaweedFS != event.EventNotification.NewParentPath {
|
|
return
|
|
}
|
|
}
|
|
|
|
entry := event.EventNotification.NewEntry
|
|
if entry == nil {
|
|
return
|
|
}
|
|
|
|
glog.V(0).Infof("procesing %v", event)
|
|
if entry.Name == FilerConfName {
|
|
f.reloadFilerConfiguration(entry)
|
|
}
|
|
}
|
|
|
|
func (f *Filer) readEntry(chunks []*filer_pb.FileChunk, size uint64) ([]byte, error) {
|
|
var buf bytes.Buffer
|
|
err := StreamContent(f.MasterClient, &buf, chunks, 0, int64(size))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return buf.Bytes(), nil
|
|
}
|
|
|
|
func (f *Filer) reloadFilerConfiguration(entry *filer_pb.Entry) {
|
|
fc := NewFilerConf()
|
|
err := fc.loadFromChunks(f, entry.Content, entry.GetChunks(), FileSize(entry))
|
|
if err != nil {
|
|
glog.Errorf("read filer conf chunks: %v", err)
|
|
return
|
|
}
|
|
f.FilerConf = fc
|
|
}
|
|
|
|
func (f *Filer) LoadFilerConf() {
|
|
fc := NewFilerConf()
|
|
err := util.Retry("loadFilerConf", func() error {
|
|
return fc.loadFromFiler(f)
|
|
})
|
|
if err != nil {
|
|
glog.Errorf("read filer conf: %v", err)
|
|
return
|
|
}
|
|
f.FilerConf = fc
|
|
}
|
|
|
|
// //////////////////////////////////
|
|
// load and maintain remote storages
|
|
// //////////////////////////////////
|
|
func (f *Filer) LoadRemoteStorageConfAndMapping() {
|
|
if err := f.RemoteStorage.LoadRemoteStorageConfigurationsAndMapping(f); err != nil {
|
|
glog.Errorf("read remote conf and mapping: %v", err)
|
|
return
|
|
}
|
|
}
|
|
func (f *Filer) maybeReloadRemoteStorageConfigurationAndMapping(event *filer_pb.SubscribeMetadataResponse) {
|
|
// FIXME add reloading
|
|
}
|