filer: auto clean empty implicit s3 folders (#8051)
* filer: auto clean empty s3 implicit folders Explicitly tag implicitly created S3 folders (parent directories from object uploads) with 'Seaweed-X-Amz-Implicit-Dir'. Update EmptyFolderCleaner to check for this attribute and cache the result efficiently. * filer: correctly handle nil attributes in empty folder cleaner cache * filer: refine implicit tagging logic Prevent tagging buckets as implicit directories. Reduce code duplication. * filer: safeguard GetEntryAttributes against nil entry and not found error * filer: move ErrNotFound handling to EmptyFolderCleaner * filer: add comment to explain level > 3 check for implicit directories
This commit is contained in:
@@ -9,6 +9,8 @@ import (
|
|||||||
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
|
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/util"
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -24,11 +26,13 @@ const (
|
|||||||
type FilerOperations interface {
|
type FilerOperations interface {
|
||||||
CountDirectoryEntries(ctx context.Context, dirPath util.FullPath, limit int) (count int, err error)
|
CountDirectoryEntries(ctx context.Context, dirPath util.FullPath, limit int) (count int, err error)
|
||||||
DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32, ifNotModifiedAfter int64) error
|
DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32, ifNotModifiedAfter int64) error
|
||||||
|
GetEntryAttributes(ctx context.Context, p util.FullPath) (attributes map[string][]byte, err error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// folderState tracks the state of a folder for empty folder cleanup
|
// folderState tracks the state of a folder for empty folder cleanup
|
||||||
type folderState struct {
|
type folderState struct {
|
||||||
roughCount int // Cached rough count (up to maxCountCheck)
|
roughCount int // Cached rough count (up to maxCountCheck)
|
||||||
|
isImplicit *bool // Tri-state boolean: nil (unknown), true (implicit), false (explicit)
|
||||||
lastAddTime time.Time // Last time an item was added
|
lastAddTime time.Time // Last time an item was added
|
||||||
lastDelTime time.Time // Last time an item was deleted
|
lastDelTime time.Time // Last time an item was deleted
|
||||||
lastCheck time.Time // Last time we checked the actual count
|
lastCheck time.Time // Last time we checked the actual count
|
||||||
@@ -265,8 +269,47 @@ func (efc *EmptyFolderCleaner) executeCleanup(folder string) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if folder is actually empty (count up to maxCountCheck)
|
// Check for explicit implicit_dir attribute
|
||||||
|
// First check cache
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
efc.mu.RLock()
|
||||||
|
var cachedImplicit *bool
|
||||||
|
if state, exists := efc.folderCounts[folder]; exists {
|
||||||
|
cachedImplicit = state.isImplicit
|
||||||
|
}
|
||||||
|
efc.mu.RUnlock()
|
||||||
|
|
||||||
|
var isImplicit bool
|
||||||
|
if cachedImplicit != nil {
|
||||||
|
isImplicit = *cachedImplicit
|
||||||
|
} else {
|
||||||
|
// Not cached, check filer
|
||||||
|
attrs, err := efc.filer.GetEntryAttributes(ctx, util.FullPath(folder))
|
||||||
|
if err != nil {
|
||||||
|
if err == filer_pb.ErrNotFound {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
glog.V(2).Infof("EmptyFolderCleaner: error getting attributes for %s: %v", folder, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
isImplicit = attrs != nil && string(attrs[s3_constants.ExtS3ImplicitDir]) == "true"
|
||||||
|
|
||||||
|
// Update cache
|
||||||
|
efc.mu.Lock()
|
||||||
|
if _, exists := efc.folderCounts[folder]; !exists {
|
||||||
|
efc.folderCounts[folder] = &folderState{}
|
||||||
|
}
|
||||||
|
efc.folderCounts[folder].isImplicit = &isImplicit
|
||||||
|
efc.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
if !isImplicit {
|
||||||
|
glog.V(4).Infof("EmptyFolderCleaner: folder %s is not marked as implicit, skipping", folder)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if folder is actually empty (count up to maxCountCheck)
|
||||||
count, err := efc.countItems(ctx, folder)
|
count, err := efc.countItems(ctx, folder)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(2).Infof("EmptyFolderCleaner: error counting items in %s: %v", folder, err)
|
glog.V(2).Infof("EmptyFolderCleaner: error counting items in %s: %v", folder, err)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket"
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket"
|
||||||
|
|
||||||
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
|
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
|
||||||
@@ -273,7 +274,8 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di
|
|||||||
|
|
||||||
// fmt.Printf("dirParts: %v %v %v\n", dirParts[0], dirParts[1], dirParts[2])
|
// fmt.Printf("dirParts: %v %v %v\n", dirParts[0], dirParts[1], dirParts[2])
|
||||||
// dirParts[0] == "" and dirParts[1] == "buckets"
|
// dirParts[0] == "" and dirParts[1] == "buckets"
|
||||||
if len(dirParts) >= 3 && dirParts[1] == "buckets" {
|
isUnderBuckets := len(dirParts) >= 3 && dirParts[1] == "buckets"
|
||||||
|
if isUnderBuckets {
|
||||||
if err := s3bucket.VerifyS3BucketName(dirParts[2]); err != nil {
|
if err := s3bucket.VerifyS3BucketName(dirParts[2]); err != nil {
|
||||||
return fmt.Errorf("invalid bucket name %s: %v", dirParts[2], err)
|
return fmt.Errorf("invalid bucket name %s: %v", dirParts[2], err)
|
||||||
}
|
}
|
||||||
@@ -299,6 +301,13 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di
|
|||||||
GroupNames: entry.GroupNames,
|
GroupNames: entry.GroupNames,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
// level > 3 corresponds to a path depth greater than "/buckets/<bucket_name>",
|
||||||
|
// ensuring we only mark subdirectories within a bucket as implicit.
|
||||||
|
if isUnderBuckets && level > 3 {
|
||||||
|
dirEntry.Extended = map[string][]byte{
|
||||||
|
s3_constants.ExtS3ImplicitDir: []byte("true"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
glog.V(2).InfofCtx(ctx, "create directory: %s %v", dirPath, dirEntry.Mode)
|
glog.V(2).InfofCtx(ctx, "create directory: %s %v", dirPath, dirEntry.Mode)
|
||||||
mkdirErr := f.Store.InsertEntry(ctx, dirEntry)
|
mkdirErr := f.Store.InsertEntry(ctx, dirEntry)
|
||||||
@@ -521,3 +530,14 @@ func (f *Filer) Shutdown() {
|
|||||||
f.LocalMetaLogBuffer.ShutdownLogBuffer()
|
f.LocalMetaLogBuffer.ShutdownLogBuffer()
|
||||||
f.Store.Shutdown()
|
f.Store.Shutdown()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (f *Filer) GetEntryAttributes(ctx context.Context, p util.FullPath) (map[string][]byte, error) {
|
||||||
|
entry, err := f.FindEntry(ctx, p)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if entry == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return entry.Extended, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ const (
|
|||||||
ExtETagKey = "Seaweed-X-Amz-ETag"
|
ExtETagKey = "Seaweed-X-Amz-ETag"
|
||||||
ExtLatestVersionIdKey = "Seaweed-X-Amz-Latest-Version-Id"
|
ExtLatestVersionIdKey = "Seaweed-X-Amz-Latest-Version-Id"
|
||||||
ExtLatestVersionFileNameKey = "Seaweed-X-Amz-Latest-Version-File-Name"
|
ExtLatestVersionFileNameKey = "Seaweed-X-Amz-Latest-Version-File-Name"
|
||||||
|
ExtS3ImplicitDir = "Seaweed-X-Amz-Implicit-Dir"
|
||||||
// Cached list metadata in .versions directory for single-scan efficiency
|
// Cached list metadata in .versions directory for single-scan efficiency
|
||||||
ExtLatestVersionSizeKey = "Seaweed-X-Amz-Latest-Version-Size"
|
ExtLatestVersionSizeKey = "Seaweed-X-Amz-Latest-Version-Size"
|
||||||
ExtLatestVersionETagKey = "Seaweed-X-Amz-Latest-Version-ETag"
|
ExtLatestVersionETagKey = "Seaweed-X-Amz-Latest-Version-ETag"
|
||||||
|
|||||||
Reference in New Issue
Block a user