Switch empty-folder cleanup to bucket policy (#8292)

* Fix Spark _temporary cleanup and add issue #8285 regression test

* Generalize empty folder cleanup for Spark temp artifacts

* Revert synchronous folder pruning and add cleanup diagnostics

* Add actionable empty-folder cleanup diagnostics

* Fix Spark temp marker cleanup in async folder cleaner

* Fix Spark temp cleanup with implicit directory markers

* Keep explicit directory markers non-implicit

* logging

* more logs

* Switch empty-folder cleanup to bucket policy

* Seaweed-X-Amz-Allow-Empty-Folders

* less logs

* go vet

* less logs

* refactoring
This commit is contained in:
Chris Lu
2026-02-10 18:38:38 -08:00
committed by GitHub
parent 5c365e7090
commit b57429ef2e
16 changed files with 798 additions and 157 deletions

View File

@@ -21,8 +21,9 @@ type CleanupQueue struct {
// queueItem represents an item in the cleanup queue
type queueItem struct {
folder string
queueTime time.Time
folder string
triggeredBy string
queueTime time.Time
}
// NewCleanupQueue creates a new CleanupQueue with the specified limits
@@ -39,7 +40,7 @@ func NewCleanupQueue(maxSize int, maxAge time.Duration) *CleanupQueue {
// The item is inserted in time-sorted order (oldest at front) to handle out-of-order events.
// If folder already exists with an older time, the time is updated and position adjusted.
// Returns true if the folder was newly added, false if it was updated.
func (q *CleanupQueue) Add(folder string, eventTime time.Time) bool {
func (q *CleanupQueue) Add(folder string, triggeredBy string, eventTime time.Time) bool {
q.mu.Lock()
defer q.mu.Unlock()
@@ -51,23 +52,24 @@ func (q *CleanupQueue) Add(folder string, eventTime time.Time) bool {
// Remove from current position
q.items.Remove(elem)
// Re-insert with new time in sorted position
newElem := q.insertSorted(folder, eventTime)
newElem := q.insertSorted(folder, triggeredBy, eventTime)
q.itemsMap[folder] = newElem
}
return false
}
// Insert new folder in sorted position
elem := q.insertSorted(folder, eventTime)
elem := q.insertSorted(folder, triggeredBy, eventTime)
q.itemsMap[folder] = elem
return true
}
// insertSorted inserts an item in the correct position to maintain time ordering (oldest at front)
func (q *CleanupQueue) insertSorted(folder string, eventTime time.Time) *list.Element {
func (q *CleanupQueue) insertSorted(folder string, triggeredBy string, eventTime time.Time) *list.Element {
item := &queueItem{
folder: folder,
queueTime: eventTime,
folder: folder,
triggeredBy: triggeredBy,
queueTime: eventTime,
}
// Find the correct position (insert before the first item with a later time)
@@ -135,35 +137,35 @@ func (q *CleanupQueue) shouldProcessLocked() bool {
// Pop removes and returns the oldest folder from the queue.
// Returns the folder and true if an item was available, or empty string and false if queue is empty.
func (q *CleanupQueue) Pop() (string, bool) {
func (q *CleanupQueue) Pop() (string, string, bool) {
q.mu.Lock()
defer q.mu.Unlock()
front := q.items.Front()
if front == nil {
return "", false
return "", "", false
}
item := front.Value.(*queueItem)
q.items.Remove(front)
delete(q.itemsMap, item.folder)
return item.folder, true
return item.folder, item.triggeredBy, true
}
// Peek returns the oldest folder without removing it.
// Returns the folder and queue time if available, or empty values if queue is empty.
func (q *CleanupQueue) Peek() (folder string, queueTime time.Time, ok bool) {
func (q *CleanupQueue) Peek() (folder string, triggeredBy string, queueTime time.Time, ok bool) {
q.mu.Lock()
defer q.mu.Unlock()
front := q.items.Front()
if front == nil {
return "", time.Time{}, false
return "", "", time.Time{}, false
}
item := front.Value.(*queueItem)
return item.folder, item.queueTime, true
return item.folder, item.triggeredBy, item.queueTime, true
}
// Len returns the current queue size.

View File

@@ -10,7 +10,7 @@ func TestCleanupQueue_Add(t *testing.T) {
now := time.Now()
// Add first item
if !q.Add("/buckets/b1/folder1", now) {
if !q.Add("/buckets/b1/folder1", "item1", now) {
t.Error("expected Add to return true for new item")
}
if q.Len() != 1 {
@@ -18,7 +18,7 @@ func TestCleanupQueue_Add(t *testing.T) {
}
// Add second item with later time
if !q.Add("/buckets/b1/folder2", now.Add(1*time.Second)) {
if !q.Add("/buckets/b1/folder2", "item2", now.Add(1*time.Second)) {
t.Error("expected Add to return true for new item")
}
if q.Len() != 2 {
@@ -26,7 +26,7 @@ func TestCleanupQueue_Add(t *testing.T) {
}
// Add duplicate with newer time - should update and reposition
if q.Add("/buckets/b1/folder1", now.Add(2*time.Second)) {
if q.Add("/buckets/b1/folder1", "item1-updated", now.Add(2*time.Second)) {
t.Error("expected Add to return false for existing item")
}
if q.Len() != 2 {
@@ -34,11 +34,14 @@ func TestCleanupQueue_Add(t *testing.T) {
}
// folder1 should now be at the back (newer time) - verify by popping
folder1, _ := q.Pop()
folder2, _ := q.Pop()
folder1, triggered1, _ := q.Pop()
folder2, triggered2, _ := q.Pop()
if folder1 != "/buckets/b1/folder2" || folder2 != "/buckets/b1/folder1" {
t.Errorf("expected folder1 to be moved to back, got %s, %s", folder1, folder2)
}
if triggered1 != "item2" || triggered2 != "item1-updated" {
t.Errorf("triggeredBy mismatch: got %s, %s", triggered1, triggered2)
}
}
func TestCleanupQueue_Add_OutOfOrder(t *testing.T) {
@@ -46,16 +49,23 @@ func TestCleanupQueue_Add_OutOfOrder(t *testing.T) {
baseTime := time.Now()
// Add items out of order
q.Add("/buckets/b1/folder3", baseTime.Add(3*time.Second))
q.Add("/buckets/b1/folder1", baseTime.Add(1*time.Second))
q.Add("/buckets/b1/folder2", baseTime.Add(2*time.Second))
q.Add("/buckets/b1/folder3", "f3", baseTime.Add(3*time.Second))
q.Add("/buckets/b1/folder1", "f1", baseTime.Add(1*time.Second))
q.Add("/buckets/b1/folder2", "f2", baseTime.Add(2*time.Second))
// Items should be in time order (oldest first) - verify by popping
expected := []string{"/buckets/b1/folder1", "/buckets/b1/folder2", "/buckets/b1/folder3"}
expected := []struct {
folder string
item string
}{
{"/buckets/b1/folder1", "f1"},
{"/buckets/b1/folder2", "f2"},
{"/buckets/b1/folder3", "f3"},
}
for i, exp := range expected {
folder, ok := q.Pop()
if !ok || folder != exp {
t.Errorf("at index %d: expected %s, got %s", i, exp, folder)
folder, item, ok := q.Pop()
if !ok || folder != exp.folder || item != exp.item {
t.Errorf("at index %d: expected %s by %s, got %s by %s", i, exp.folder, exp.item, folder, item)
}
}
}
@@ -65,15 +75,15 @@ func TestCleanupQueue_Add_DuplicateWithOlderTime(t *testing.T) {
baseTime := time.Now()
// Add folder at t=5
q.Add("/buckets/b1/folder1", baseTime.Add(5*time.Second))
q.Add("/buckets/b1/folder1", "t5", baseTime.Add(5*time.Second))
// Try to add same folder with older time - should NOT update
q.Add("/buckets/b1/folder1", baseTime.Add(2*time.Second))
q.Add("/buckets/b1/folder1", "t2", baseTime.Add(2*time.Second))
// Time should remain at t=5
_, queueTime, _ := q.Peek()
if queueTime != baseTime.Add(5*time.Second) {
t.Errorf("expected time to remain unchanged, got %v", queueTime)
// Time and triggeredBy should remain at t=5
_, triggered, queueTime, _ := q.Peek()
if queueTime != baseTime.Add(5*time.Second) || triggered != "t5" {
t.Errorf("expected time to remain unchanged, got %v and %s", queueTime, triggered)
}
}
@@ -81,9 +91,9 @@ func TestCleanupQueue_Remove(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
q.Add("/buckets/b1/folder1", "i1", now)
q.Add("/buckets/b1/folder2", "i2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", "i3", now.Add(2*time.Second))
// Remove middle item
if !q.Remove("/buckets/b1/folder2") {
@@ -102,8 +112,8 @@ func TestCleanupQueue_Remove(t *testing.T) {
}
// Verify order is preserved by popping
folder1, _ := q.Pop()
folder3, _ := q.Pop()
folder1, _, _ := q.Pop()
folder3, _, _ := q.Pop()
if folder1 != "/buckets/b1/folder1" || folder3 != "/buckets/b1/folder3" {
t.Errorf("unexpected order: %s, %s", folder1, folder3)
}
@@ -114,30 +124,30 @@ func TestCleanupQueue_Pop(t *testing.T) {
now := time.Now()
// Pop from empty queue
folder, ok := q.Pop()
folder, item, ok := q.Pop()
if ok {
t.Error("expected Pop to return false for empty queue")
}
if folder != "" {
t.Errorf("expected empty folder, got %s", folder)
if folder != "" || item != "" {
t.Errorf("expected empty results, got %s by %s", folder, item)
}
// Add items and pop in order
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
q.Add("/buckets/b1/folder1", "i1", now)
q.Add("/buckets/b1/folder2", "i2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", "i3", now.Add(2*time.Second))
folder, ok = q.Pop()
folder, _, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder1" {
t.Errorf("expected folder1, got %s (ok=%v)", folder, ok)
}
folder, ok = q.Pop()
folder, _, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder2" {
t.Errorf("expected folder2, got %s (ok=%v)", folder, ok)
}
folder, ok = q.Pop()
folder, _, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder3" {
t.Errorf("expected folder3, got %s (ok=%v)", folder, ok)
}
@@ -153,16 +163,16 @@ func TestCleanupQueue_Peek(t *testing.T) {
now := time.Now()
// Peek empty queue
folder, _, ok := q.Peek()
folder, item, _, ok := q.Peek()
if ok {
t.Error("expected Peek to return false for empty queue")
}
// Add item and peek
q.Add("/buckets/b1/folder1", now)
folder, queueTime, ok := q.Peek()
if !ok || folder != "/buckets/b1/folder1" {
t.Errorf("expected folder1, got %s (ok=%v)", folder, ok)
q.Add("/buckets/b1/folder1", "i1", now)
folder, item, queueTime, ok := q.Peek()
if !ok || folder != "/buckets/b1/folder1" || item != "i1" {
t.Errorf("expected folder1 by i1, got %s by %s (ok=%v)", folder, item, ok)
}
if queueTime != now {
t.Errorf("expected queue time %v, got %v", now, queueTime)
@@ -178,7 +188,7 @@ func TestCleanupQueue_Contains(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder1", "i1", now)
if !q.Contains("/buckets/b1/folder1") {
t.Error("expected Contains to return true")
@@ -198,14 +208,14 @@ func TestCleanupQueue_ShouldProcess_MaxSize(t *testing.T) {
}
// Add items below max
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder1", "i1", now)
q.Add("/buckets/b1/folder2", "i2", now.Add(1*time.Second))
if q.ShouldProcess() {
t.Error("queue below max should not need processing")
}
// Add item to reach max
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
q.Add("/buckets/b1/folder3", "i3", now.Add(2*time.Second))
if !q.ShouldProcess() {
t.Error("queue at max should need processing")
}
@@ -216,7 +226,7 @@ func TestCleanupQueue_ShouldProcess_MaxAge(t *testing.T) {
// Add item with old event time
oldTime := time.Now().Add(-1 * time.Second) // 1 second ago
q.Add("/buckets/b1/folder1", oldTime)
q.Add("/buckets/b1/folder1", "i1", oldTime)
// Item is older than maxAge, should need processing
if !q.ShouldProcess() {
@@ -225,7 +235,7 @@ func TestCleanupQueue_ShouldProcess_MaxAge(t *testing.T) {
// Clear and add fresh item
q.Clear()
q.Add("/buckets/b1/folder2", time.Now())
q.Add("/buckets/b1/folder2", "i2", time.Now())
// Fresh item should not trigger processing
if q.ShouldProcess() {
@@ -237,9 +247,9 @@ func TestCleanupQueue_Clear(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
q.Add("/buckets/b1/folder1", "i1", now)
q.Add("/buckets/b1/folder2", "i2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", "i3", now.Add(2*time.Second))
q.Clear()
@@ -261,7 +271,7 @@ func TestCleanupQueue_OldestAge(t *testing.T) {
// Add item with time in the past
oldTime := time.Now().Add(-5 * time.Minute)
q.Add("/buckets/b1/folder1", oldTime)
q.Add("/buckets/b1/folder1", "i1", oldTime)
// Age should be approximately 5 minutes
age := q.OldestAge()
@@ -283,12 +293,12 @@ func TestCleanupQueue_TimeOrder(t *testing.T) {
"/buckets/b1/e",
}
for i, item := range items {
q.Add(item, baseTime.Add(time.Duration(i)*time.Second))
q.Add(item, "i", baseTime.Add(time.Duration(i)*time.Second))
}
// Pop should return in time order
for i, expected := range items {
got, ok := q.Pop()
got, _, ok := q.Pop()
if !ok {
t.Errorf("Pop %d: expected item, got empty", i)
}
@@ -303,17 +313,17 @@ func TestCleanupQueue_DuplicateWithNewerTime(t *testing.T) {
baseTime := time.Now()
// Add items
q.Add("/buckets/b1/folder1", baseTime)
q.Add("/buckets/b1/folder2", baseTime.Add(1*time.Second))
q.Add("/buckets/b1/folder3", baseTime.Add(2*time.Second))
q.Add("/buckets/b1/folder1", "i1", baseTime)
q.Add("/buckets/b1/folder2", "i2", baseTime.Add(1*time.Second))
q.Add("/buckets/b1/folder3", "i3", baseTime.Add(2*time.Second))
// Add duplicate with newer time - should update and reposition
q.Add("/buckets/b1/folder1", baseTime.Add(3*time.Second))
q.Add("/buckets/b1/folder1", "i1-new", baseTime.Add(3*time.Second))
// folder1 should now be at the back (newest time) - verify by popping
expected := []string{"/buckets/b1/folder2", "/buckets/b1/folder3", "/buckets/b1/folder1"}
for i, exp := range expected {
folder, ok := q.Pop()
folder, _, ok := q.Pop()
if !ok || folder != exp {
t.Errorf("at index %d: expected %s, got %s", i, exp, folder)
}
@@ -328,7 +338,7 @@ func TestCleanupQueue_Concurrent(t *testing.T) {
// Concurrent adds
go func() {
for i := 0; i < 100; i++ {
q.Add("/buckets/b1/folder"+string(rune('A'+i%26)), now.Add(time.Duration(i)*time.Millisecond))
q.Add("/buckets/b1/folder"+string(rune('A'+i%26)), "item", now.Add(time.Duration(i)*time.Millisecond))
}
done <- true
}()

View File

@@ -18,7 +18,7 @@ const (
DefaultMaxCountCheck = 1000
DefaultCacheExpiry = 5 * time.Minute
DefaultQueueMaxSize = 1000
DefaultQueueMaxAge = 10 * time.Minute
DefaultQueueMaxAge = 5 * time.Second
DefaultProcessorSleep = 10 * time.Second // How often to check queue
)
@@ -32,12 +32,17 @@ type FilerOperations interface {
// folderState tracks the state of a folder for empty folder cleanup
type folderState struct {
roughCount int // Cached rough count (up to maxCountCheck)
isImplicit *bool // Tri-state boolean: nil (unknown), true (implicit), false (explicit)
lastAddTime time.Time // Last time an item was added
lastDelTime time.Time // Last time an item was deleted
lastCheck time.Time // Last time we checked the actual count
}
type bucketCleanupPolicyState struct {
autoRemove bool
attrValue string
lastCheck time.Time
}
// EmptyFolderCleaner handles asynchronous cleanup of empty folders
// Each filer owns specific folders via consistent hashing based on the peer filer list
type EmptyFolderCleaner struct {
@@ -46,8 +51,9 @@ type EmptyFolderCleaner struct {
host pb.ServerAddress
// Folder state tracking
mu sync.RWMutex
folderCounts map[string]*folderState // Rough count cache
mu sync.RWMutex
folderCounts map[string]*folderState // Rough count cache
bucketCleanupPolicies map[string]*bucketCleanupPolicyState // bucket path -> cleanup policy cache
// Cleanup queue (thread-safe, has its own lock)
cleanupQueue *CleanupQueue
@@ -66,17 +72,18 @@ type EmptyFolderCleaner struct {
// NewEmptyFolderCleaner creates a new EmptyFolderCleaner
func NewEmptyFolderCleaner(filer FilerOperations, lockRing *lock_manager.LockRing, host pb.ServerAddress, bucketPath string) *EmptyFolderCleaner {
efc := &EmptyFolderCleaner{
filer: filer,
lockRing: lockRing,
host: host,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(DefaultQueueMaxSize, DefaultQueueMaxAge),
maxCountCheck: DefaultMaxCountCheck,
cacheExpiry: DefaultCacheExpiry,
processorSleep: DefaultProcessorSleep,
bucketPath: bucketPath,
enabled: true,
stopCh: make(chan struct{}),
filer: filer,
lockRing: lockRing,
host: host,
folderCounts: make(map[string]*folderState),
bucketCleanupPolicies: make(map[string]*bucketCleanupPolicyState),
cleanupQueue: NewCleanupQueue(DefaultQueueMaxSize, DefaultQueueMaxAge),
maxCountCheck: DefaultMaxCountCheck,
cacheExpiry: DefaultCacheExpiry,
processorSleep: DefaultProcessorSleep,
bucketPath: bucketPath,
enabled: true,
stopCh: make(chan struct{}),
}
go efc.cacheEvictionLoop()
go efc.cleanupProcessor()
@@ -162,8 +169,8 @@ func (efc *EmptyFolderCleaner) OnDeleteEvent(directory string, entryName string,
}
// Add to cleanup queue with event time (handles out-of-order events)
if efc.cleanupQueue.Add(directory, eventTime) {
glog.V(3).Infof("EmptyFolderCleaner: queued %s for cleanup", directory)
if efc.cleanupQueue.Add(directory, entryName, eventTime) {
glog.V(3).Infof("EmptyFolderCleaner: queued %s for cleanup (triggered by %s)", directory, entryName)
}
}
@@ -214,6 +221,10 @@ func (efc *EmptyFolderCleaner) cleanupProcessor() {
func (efc *EmptyFolderCleaner) processCleanupQueue() {
// Check if we should process
if !efc.cleanupQueue.ShouldProcess() {
if efc.cleanupQueue.Len() > 0 {
glog.Infof("EmptyFolderCleaner: pending queue not processed yet (len=%d, oldest_age=%v, max_size=%d, max_age=%v)",
efc.cleanupQueue.Len(), efc.cleanupQueue.OldestAge(), efc.cleanupQueue.maxSize, efc.cleanupQueue.maxAge)
}
return
}
@@ -228,35 +239,30 @@ func (efc *EmptyFolderCleaner) processCleanupQueue() {
}
// Pop the oldest item
folder, ok := efc.cleanupQueue.Pop()
folder, triggeredBy, ok := efc.cleanupQueue.Pop()
if !ok {
break
}
// Execute cleanup for this folder
efc.executeCleanup(folder)
// If queue is no longer full and oldest item is not old enough, stop processing
if !efc.cleanupQueue.ShouldProcess() {
break
}
efc.executeCleanup(folder, triggeredBy)
}
}
// executeCleanup performs the actual cleanup of an empty folder
func (efc *EmptyFolderCleaner) executeCleanup(folder string) {
func (efc *EmptyFolderCleaner) executeCleanup(folder string, triggeredBy string) {
efc.mu.Lock()
// Quick check: if we have cached count and it's > 0, skip
if state, exists := efc.folderCounts[folder]; exists {
if state.roughCount > 0 {
glog.V(3).Infof("EmptyFolderCleaner: skipping %s, cached count=%d", folder, state.roughCount)
glog.V(3).Infof("EmptyFolderCleaner: skipping %s (triggered by %s), cached count=%d", folder, triggeredBy, state.roughCount)
efc.mu.Unlock()
return
}
// If there was an add after our delete, skip
if !state.lastAddTime.IsZero() && state.lastAddTime.After(state.lastDelTime) {
glog.V(3).Infof("EmptyFolderCleaner: skipping %s, add happened after delete", folder)
glog.V(3).Infof("EmptyFolderCleaner: skipping %s (triggered by %s), add happened after delete", folder, triggeredBy)
efc.mu.Unlock()
return
}
@@ -265,47 +271,23 @@ func (efc *EmptyFolderCleaner) executeCleanup(folder string) {
// Re-check ownership (topology might have changed)
if !efc.ownsFolder(folder) {
glog.V(3).Infof("EmptyFolderCleaner: no longer owner of %s, skipping", folder)
glog.V(3).Infof("EmptyFolderCleaner: no longer owner of %s (triggered by %s), skipping", folder, triggeredBy)
return
}
// Check for explicit implicit_dir attribute
// First check cache
ctx := context.Background()
efc.mu.RLock()
var cachedImplicit *bool
if state, exists := efc.folderCounts[folder]; exists {
cachedImplicit = state.isImplicit
}
efc.mu.RUnlock()
var isImplicit bool
if cachedImplicit != nil {
isImplicit = *cachedImplicit
} else {
// Not cached, check filer
attrs, err := efc.filer.GetEntryAttributes(ctx, util.FullPath(folder))
if err != nil {
if err == filer_pb.ErrNotFound {
return
}
glog.V(2).Infof("EmptyFolderCleaner: error getting attributes for %s: %v", folder, err)
bucketPath, autoRemove, source, attrValue, err := efc.getBucketCleanupPolicy(ctx, folder)
if err != nil {
if err == filer_pb.ErrNotFound {
return
}
isImplicit = attrs != nil && string(attrs[s3_constants.ExtS3ImplicitDir]) == "true"
// Update cache
efc.mu.Lock()
if _, exists := efc.folderCounts[folder]; !exists {
efc.folderCounts[folder] = &folderState{}
}
efc.folderCounts[folder].isImplicit = &isImplicit
efc.mu.Unlock()
glog.V(2).Infof("EmptyFolderCleaner: failed to load bucket cleanup policy for folder %s (triggered by %s): %v", folder, triggeredBy, err)
return
}
if !isImplicit {
glog.V(4).Infof("EmptyFolderCleaner: folder %s is not marked as implicit, skipping", folder)
if !autoRemove {
glog.V(3).Infof("EmptyFolderCleaner: skipping folder %s (triggered by %s), bucket %s auto-remove-empty-folders disabled (source=%s attr=%s)",
folder, triggeredBy, bucketPath, source, attrValue)
return
}
@@ -326,14 +308,14 @@ func (efc *EmptyFolderCleaner) executeCleanup(folder string) {
efc.mu.Unlock()
if count > 0 {
glog.V(3).Infof("EmptyFolderCleaner: folder %s has %d items, not empty", folder, count)
glog.Infof("EmptyFolderCleaner: folder %s (triggered by %s) has %d items, not empty", folder, triggeredBy, count)
return
}
// Delete the empty folder
glog.V(2).Infof("EmptyFolderCleaner: deleting empty folder %s", folder)
glog.Infof("EmptyFolderCleaner: deleting empty folder %s (triggered by %s)", folder, triggeredBy)
if err := efc.deleteFolder(ctx, folder); err != nil {
glog.V(2).Infof("EmptyFolderCleaner: failed to delete empty folder %s: %v", folder, err)
glog.V(2).Infof("EmptyFolderCleaner: failed to delete empty folder %s (triggered by %s): %v", folder, triggeredBy, err)
return
}
@@ -357,6 +339,60 @@ func (efc *EmptyFolderCleaner) deleteFolder(ctx context.Context, folder string)
return efc.filer.DeleteEntryMetaAndData(ctx, util.FullPath(folder), false, false, false, false, nil, 0)
}
func (efc *EmptyFolderCleaner) getBucketCleanupPolicy(ctx context.Context, folder string) (bucketPath string, autoRemove bool, source string, attrValue string, err error) {
bucketPath, ok := util.ExtractBucketPath(efc.bucketPath, folder, true)
if !ok {
return "", true, "default", "<not_bucket_path>", nil
}
now := time.Now()
efc.mu.RLock()
if state, found := efc.bucketCleanupPolicies[bucketPath]; found && now.Sub(state.lastCheck) <= efc.cacheExpiry {
efc.mu.RUnlock()
return bucketPath, state.autoRemove, "cache", state.attrValue, nil
}
efc.mu.RUnlock()
attrs, err := efc.filer.GetEntryAttributes(ctx, util.FullPath(bucketPath))
if err != nil {
return "", true, "", "", err
}
autoRemove, attrValue = autoRemoveEmptyFoldersEnabled(attrs)
efc.mu.Lock()
if efc.bucketCleanupPolicies == nil {
efc.bucketCleanupPolicies = make(map[string]*bucketCleanupPolicyState)
}
efc.bucketCleanupPolicies[bucketPath] = &bucketCleanupPolicyState{
autoRemove: autoRemove,
attrValue: attrValue,
lastCheck: now,
}
efc.mu.Unlock()
return bucketPath, autoRemove, "filer", attrValue, nil
}
func autoRemoveEmptyFoldersEnabled(attrs map[string][]byte) (bool, string) {
if attrs == nil {
return true, "<no_attrs>"
}
value, found := attrs[s3_constants.ExtAllowEmptyFolders]
if !found {
return true, "<missing>"
}
text := strings.TrimSpace(string(value))
if text == "" {
return true, "<empty>"
}
return !strings.EqualFold(text, "true"), text
}
// isUnderPath checks if child is under parent path
func isUnderPath(child, parent string) bool {
if parent == "" || parent == "/" {
@@ -445,6 +481,12 @@ func (efc *EmptyFolderCleaner) evictStaleCacheEntries() {
}
}
for bucketPath, state := range efc.bucketCleanupPolicies {
if now.Sub(state.lastCheck) > efc.cacheExpiry {
delete(efc.bucketCleanupPolicies, bucketPath)
}
}
if expiredCount > 0 {
glog.V(3).Infof("EmptyFolderCleaner: evicted %d stale cache entries", expiredCount)
}
@@ -460,6 +502,7 @@ func (efc *EmptyFolderCleaner) Stop() {
efc.enabled = false
efc.cleanupQueue.Clear()
efc.folderCounts = make(map[string]*folderState) // Clear cache on stop
efc.bucketCleanupPolicies = make(map[string]*bucketCleanupPolicyState)
}
// GetPendingCleanupCount returns the number of pending cleanup tasks (for testing)

View File

@@ -1,13 +1,43 @@
package empty_folder_cleanup
import (
"context"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/util"
)
type mockFilerOps struct {
countFn func(path util.FullPath) (int, error)
deleteFn func(path util.FullPath) error
attrsFn func(path util.FullPath) (map[string][]byte, error)
}
func (m *mockFilerOps) CountDirectoryEntries(_ context.Context, dirPath util.FullPath, _ int) (int, error) {
if m.countFn == nil {
return 0, nil
}
return m.countFn(dirPath)
}
func (m *mockFilerOps) DeleteEntryMetaAndData(_ context.Context, p util.FullPath, _, _, _, _ bool, _ []int32, _ int64) error {
if m.deleteFn == nil {
return nil
}
return m.deleteFn(p)
}
func (m *mockFilerOps) GetEntryAttributes(_ context.Context, p util.FullPath) (map[string][]byte, error) {
if m.attrsFn == nil {
return nil, nil
}
return m.attrsFn(p)
}
func Test_isUnderPath(t *testing.T) {
tests := []struct {
name string
@@ -66,6 +96,56 @@ func Test_isUnderBucketPath(t *testing.T) {
}
}
func Test_autoRemoveEmptyFoldersEnabled(t *testing.T) {
tests := []struct {
name string
attrs map[string][]byte
enabled bool
attrValue string
}{
{
name: "no attrs defaults enabled",
attrs: nil,
enabled: true,
attrValue: "<no_attrs>",
},
{
name: "missing key defaults enabled",
attrs: map[string][]byte{},
enabled: true,
attrValue: "<missing>",
},
{
name: "allow-empty disables cleanup",
attrs: map[string][]byte{
s3_constants.ExtAllowEmptyFolders: []byte("true"),
},
enabled: false,
attrValue: "true",
},
{
name: "explicit false keeps cleanup enabled",
attrs: map[string][]byte{
s3_constants.ExtAllowEmptyFolders: []byte("false"),
},
enabled: true,
attrValue: "false",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
enabled, attrValue := autoRemoveEmptyFoldersEnabled(tt.attrs)
if enabled != tt.enabled {
t.Fatalf("expected enabled=%v, got %v", tt.enabled, enabled)
}
if attrValue != tt.attrValue {
t.Fatalf("expected attrValue=%q, got %q", tt.attrValue, attrValue)
}
})
}
}
func TestEmptyFolderCleaner_ownsFolder(t *testing.T) {
// Create a LockRing with multiple servers
lockRing := lock_manager.NewLockRing(5 * time.Second)
@@ -512,7 +592,7 @@ func TestEmptyFolderCleaner_cacheEviction_skipsEntriesInQueue(t *testing.T) {
// Add a stale cache entry
cleaner.folderCounts[folder] = &folderState{roughCount: 0, lastCheck: oldTime}
// Also add to cleanup queue
cleaner.cleanupQueue.Add(folder, time.Now())
cleaner.cleanupQueue.Add(folder, "item", time.Now())
// Run eviction
cleaner.evictStaleCacheEntries()
@@ -558,7 +638,7 @@ func TestEmptyFolderCleaner_queueFIFOOrder(t *testing.T) {
// Verify time-sorted order by popping
for i, expected := range folders {
folder, ok := cleaner.cleanupQueue.Pop()
folder, _, ok := cleaner.cleanupQueue.Pop()
if !ok || folder != expected {
t.Errorf("expected folder %s at index %d, got %s", expected, i, folder)
}
@@ -566,3 +646,90 @@ func TestEmptyFolderCleaner_queueFIFOOrder(t *testing.T) {
cleaner.Stop()
}
func TestEmptyFolderCleaner_processCleanupQueue_drainsAllOnceTriggered(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
var deleted []string
mock := &mockFilerOps{
countFn: func(_ util.FullPath) (int, error) {
return 0, nil
},
deleteFn: func(path util.FullPath) error {
deleted = append(deleted, string(path))
return nil
},
}
cleaner := &EmptyFolderCleaner{
filer: mock,
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(2, time.Hour),
maxCountCheck: 1000,
cacheExpiry: time.Minute,
processorSleep: time.Second,
stopCh: make(chan struct{}),
}
now := time.Now()
cleaner.cleanupQueue.Add("/buckets/test/folder1", "i1", now)
cleaner.cleanupQueue.Add("/buckets/test/folder2", "i2", now.Add(time.Millisecond))
cleaner.cleanupQueue.Add("/buckets/test/folder3", "i3", now.Add(2*time.Millisecond))
cleaner.processCleanupQueue()
if got := cleaner.cleanupQueue.Len(); got != 0 {
t.Fatalf("expected queue to be drained, got len=%d", got)
}
if len(deleted) != 3 {
t.Fatalf("expected 3 deleted folders, got %d", len(deleted))
}
}
func TestEmptyFolderCleaner_executeCleanup_bucketPolicyDisabledSkips(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
var deleted []string
mock := &mockFilerOps{
countFn: func(_ util.FullPath) (int, error) {
return 0, nil
},
deleteFn: func(path util.FullPath) error {
deleted = append(deleted, string(path))
return nil
},
attrsFn: func(path util.FullPath) (map[string][]byte, error) {
if string(path) == "/buckets/test" {
return map[string][]byte{s3_constants.ExtAllowEmptyFolders: []byte("true")}, nil
}
return nil, nil
},
}
cleaner := &EmptyFolderCleaner{
filer: mock,
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, time.Minute),
maxCountCheck: 1000,
cacheExpiry: time.Minute,
processorSleep: time.Second,
stopCh: make(chan struct{}),
}
folder := "/buckets/test/folder"
cleaner.executeCleanup(folder, "triggered_item")
if len(deleted) != 0 {
t.Fatalf("expected folder %s to be skipped, got deletions %v", folder, deleted)
}
}

View File

@@ -9,7 +9,6 @@ import (
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
@@ -303,12 +302,8 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di
GroupNames: entry.GroupNames,
},
}
// level > 3 corresponds to a path depth greater than "/buckets/<bucket_name>",
// ensuring we only mark subdirectories within a bucket as implicit.
if isUnderBuckets && level > 3 {
dirEntry.Extended = map[string][]byte{
s3_constants.ExtS3ImplicitDir: []byte("true"),
}
// Parent directories under buckets are created automatically; no additional logging.
}
glog.V(2).InfofCtx(ctx, "create directory: %s %v", dirPath, dirEntry.Mode)