filer: auto clean empty implicit s3 folders (#8051)
* filer: auto clean empty s3 implicit folders Explicitly tag implicitly created S3 folders (parent directories from object uploads) with 'Seaweed-X-Amz-Implicit-Dir'. Update EmptyFolderCleaner to check for this attribute and cache the result efficiently. * filer: correctly handle nil attributes in empty folder cleaner cache * filer: refine implicit tagging logic Prevent tagging buckets as implicit directories. Reduce code duplication. * filer: safeguard GetEntryAttributes against nil entry and not found error * filer: move ErrNotFound handling to EmptyFolderCleaner * filer: add comment to explain level > 3 check for implicit directories
This commit is contained in:
@@ -9,6 +9,8 @@ import (
|
||||
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util"
|
||||
)
|
||||
|
||||
@@ -24,11 +26,13 @@ const (
|
||||
type FilerOperations interface {
|
||||
CountDirectoryEntries(ctx context.Context, dirPath util.FullPath, limit int) (count int, err error)
|
||||
DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32, ifNotModifiedAfter int64) error
|
||||
GetEntryAttributes(ctx context.Context, p util.FullPath) (attributes map[string][]byte, err error)
|
||||
}
|
||||
|
||||
// folderState tracks the state of a folder for empty folder cleanup
|
||||
type folderState struct {
|
||||
roughCount int // Cached rough count (up to maxCountCheck)
|
||||
isImplicit *bool // Tri-state boolean: nil (unknown), true (implicit), false (explicit)
|
||||
lastAddTime time.Time // Last time an item was added
|
||||
lastDelTime time.Time // Last time an item was deleted
|
||||
lastCheck time.Time // Last time we checked the actual count
|
||||
@@ -265,8 +269,47 @@ func (efc *EmptyFolderCleaner) executeCleanup(folder string) {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if folder is actually empty (count up to maxCountCheck)
|
||||
// Check for explicit implicit_dir attribute
|
||||
// First check cache
|
||||
ctx := context.Background()
|
||||
efc.mu.RLock()
|
||||
var cachedImplicit *bool
|
||||
if state, exists := efc.folderCounts[folder]; exists {
|
||||
cachedImplicit = state.isImplicit
|
||||
}
|
||||
efc.mu.RUnlock()
|
||||
|
||||
var isImplicit bool
|
||||
if cachedImplicit != nil {
|
||||
isImplicit = *cachedImplicit
|
||||
} else {
|
||||
// Not cached, check filer
|
||||
attrs, err := efc.filer.GetEntryAttributes(ctx, util.FullPath(folder))
|
||||
if err != nil {
|
||||
if err == filer_pb.ErrNotFound {
|
||||
return
|
||||
}
|
||||
glog.V(2).Infof("EmptyFolderCleaner: error getting attributes for %s: %v", folder, err)
|
||||
return
|
||||
}
|
||||
|
||||
isImplicit = attrs != nil && string(attrs[s3_constants.ExtS3ImplicitDir]) == "true"
|
||||
|
||||
// Update cache
|
||||
efc.mu.Lock()
|
||||
if _, exists := efc.folderCounts[folder]; !exists {
|
||||
efc.folderCounts[folder] = &folderState{}
|
||||
}
|
||||
efc.folderCounts[folder].isImplicit = &isImplicit
|
||||
efc.mu.Unlock()
|
||||
}
|
||||
|
||||
if !isImplicit {
|
||||
glog.V(4).Infof("EmptyFolderCleaner: folder %s is not marked as implicit, skipping", folder)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if folder is actually empty (count up to maxCountCheck)
|
||||
count, err := efc.countItems(ctx, folder)
|
||||
if err != nil {
|
||||
glog.V(2).Infof("EmptyFolderCleaner: error counting items in %s: %v", folder, err)
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||
"github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
|
||||
@@ -273,7 +274,8 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di
|
||||
|
||||
// fmt.Printf("dirParts: %v %v %v\n", dirParts[0], dirParts[1], dirParts[2])
|
||||
// dirParts[0] == "" and dirParts[1] == "buckets"
|
||||
if len(dirParts) >= 3 && dirParts[1] == "buckets" {
|
||||
isUnderBuckets := len(dirParts) >= 3 && dirParts[1] == "buckets"
|
||||
if isUnderBuckets {
|
||||
if err := s3bucket.VerifyS3BucketName(dirParts[2]); err != nil {
|
||||
return fmt.Errorf("invalid bucket name %s: %v", dirParts[2], err)
|
||||
}
|
||||
@@ -299,6 +301,13 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di
|
||||
GroupNames: entry.GroupNames,
|
||||
},
|
||||
}
|
||||
// level > 3 corresponds to a path depth greater than "/buckets/<bucket_name>",
|
||||
// ensuring we only mark subdirectories within a bucket as implicit.
|
||||
if isUnderBuckets && level > 3 {
|
||||
dirEntry.Extended = map[string][]byte{
|
||||
s3_constants.ExtS3ImplicitDir: []byte("true"),
|
||||
}
|
||||
}
|
||||
|
||||
glog.V(2).InfofCtx(ctx, "create directory: %s %v", dirPath, dirEntry.Mode)
|
||||
mkdirErr := f.Store.InsertEntry(ctx, dirEntry)
|
||||
@@ -521,3 +530,14 @@ func (f *Filer) Shutdown() {
|
||||
f.LocalMetaLogBuffer.ShutdownLogBuffer()
|
||||
f.Store.Shutdown()
|
||||
}
|
||||
|
||||
func (f *Filer) GetEntryAttributes(ctx context.Context, p util.FullPath) (map[string][]byte, error) {
|
||||
entry, err := f.FindEntry(ctx, p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if entry == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return entry.Extended, nil
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ const (
|
||||
ExtETagKey = "Seaweed-X-Amz-ETag"
|
||||
ExtLatestVersionIdKey = "Seaweed-X-Amz-Latest-Version-Id"
|
||||
ExtLatestVersionFileNameKey = "Seaweed-X-Amz-Latest-Version-File-Name"
|
||||
ExtS3ImplicitDir = "Seaweed-X-Amz-Implicit-Dir"
|
||||
// Cached list metadata in .versions directory for single-scan efficiency
|
||||
ExtLatestVersionSizeKey = "Seaweed-X-Amz-Latest-Version-Size"
|
||||
ExtLatestVersionETagKey = "Seaweed-X-Amz-Latest-Version-ETag"
|
||||
|
||||
Reference in New Issue
Block a user