do delete expired entries on s3 list request (#7426)

* do delete expired entries on s3 list request
https://github.com/seaweedfs/seaweedfs/issues/6837

* disable delete expires s3 entry in filer

* pass opt allowDeleteObjectsByTTL to all servers

* delete on get and head

* add lifecycle expiration s3 tests

* fix opt allowDeleteObjectsByTTL for server

* fix test lifecycle expiration

* fix IsExpired

* fix locationPrefix for updateEntriesTTL

* fix s3tests

* resolv  coderabbitai

* GetS3ExpireTime on filer

* go mod

* clear TtlSeconds for volume

* move s3 delete expired entry to filer

* filer delete meta and data

* del unusing func removeExpiredObject

* test s3 put

* test s3 put multipart

* allowDeleteObjectsByTTL by default

* fix pipline tests

* rm dublicate SeaweedFSExpiresS3

* revert expiration tests

* fix updateTTL

* rm log

* resolv comment

* fix delete version object

* fix S3Versioning

* fix delete on FindEntry

* fix delete chunks

* fix sqlite not support concurrent writes/reads

* move deletion out of listing transaction; delete entries and empty folders

* Revert "fix sqlite not support concurrent writes/reads"

This reverts commit 5d5da14e0ed91c613fe5c0ed058f58bb04fba6f0.

* clearer handling on recursive empty directory deletion

* handle listing errors

* strut copying

* reuse code to delete empty folders

* use iterative approach with a queue to avoid recursive WithFilerClient calls

* stop a gRPC stream from the client-side callback is to return a specific error, e.g., io.EOF

* still issue UpdateEntry when the flag must be added

* errors join

* join path

* cleaner

* add context, sort directories by depth (deepest first) to avoid redundant checks

* batched operation, refactoring

* prevent deleting bucket

* constant

* reuse code

* more logging

* refactoring

* s3 TTL time

* Safety check

---------

Co-authored-by: chrislu <chris.lu@gmail.com>
This commit is contained in:
Konstantin Lebedev
2025-11-06 11:05:54 +05:00
committed by GitHub
parent cc444b1868
commit 084b377f87
18 changed files with 489 additions and 108 deletions

View File

@@ -55,8 +55,7 @@ func (s3a *S3ApiServer) createMultipartUpload(r *http.Request, input *s3.CreateM
if entry.Extended == nil {
entry.Extended = make(map[string][]byte)
}
entry.Extended["key"] = []byte(*input.Key)
entry.Extended[s3_constants.ExtMultipartObjectKey] = []byte(*input.Key)
// Set object owner for multipart upload
amzAccountId := r.Header.Get(s3_constants.AmzAccountId)
if amzAccountId != "" {
@@ -173,6 +172,7 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
deleteEntries := []*filer_pb.Entry{}
partEntries := make(map[int][]*filer_pb.Entry, len(entries))
entityTooSmall := false
entityWithTtl := false
for _, entry := range entries {
foundEntry := false
glog.V(4).Infof("completeMultipartUpload part entries %s", entry.Name)
@@ -212,6 +212,9 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
foundEntry = true
}
if foundEntry {
if !entityWithTtl && entry.Attributes != nil && entry.Attributes.TtlSec > 0 {
entityWithTtl = true
}
if len(completedPartNumbers) > 1 && partNumber != completedPartNumbers[len(completedPartNumbers)-1] &&
entry.Attributes.FileSize < multiPartMinSize {
glog.Warningf("completeMultipartUpload %s part file size less 5mb", entry.Name)
@@ -330,7 +333,7 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
}
for k, v := range pentry.Extended {
if k != "key" {
if k != s3_constants.ExtMultipartObjectKey {
versionEntry.Extended[k] = v
}
}
@@ -392,7 +395,7 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
}
for k, v := range pentry.Extended {
if k != "key" {
if k != s3_constants.ExtMultipartObjectKey {
entry.Extended[k] = v
}
}
@@ -445,7 +448,7 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
}
for k, v := range pentry.Extended {
if k != "key" {
if k != s3_constants.ExtMultipartObjectKey {
entry.Extended[k] = v
}
}
@@ -468,6 +471,10 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
entry.Attributes.Mime = mime
}
entry.Attributes.FileSize = uint64(offset)
// Set TTL-based S3 expiry (modification time)
if entityWithTtl {
entry.Extended[s3_constants.SeaweedFSExpiresS3] = []byte("true")
}
})
if err != nil {
@@ -587,7 +594,7 @@ func (s3a *S3ApiServer) listMultipartUploads(input *s3.ListMultipartUploadsInput
uploadsCount := int64(0)
for _, entry := range entries {
if entry.Extended != nil {
key := string(entry.Extended["key"])
key := string(entry.Extended[s3_constants.ExtMultipartObjectKey])
if *input.KeyMarker != "" && *input.KeyMarker != key {
continue
}

View File

@@ -2,11 +2,14 @@ package s3api
import (
"context"
"errors"
"fmt"
"strings"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/util"
)
@@ -108,6 +111,110 @@ func (s3a *S3ApiServer) updateEntry(parentDirectoryPath string, newEntry *filer_
return err
}
func (s3a *S3ApiServer) updateEntriesTTL(parentDirectoryPath string, ttlSec int32) error {
// Use iterative approach with a queue to avoid recursive WithFilerClient calls
// which would create a new connection for each subdirectory
return s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
ctx := context.Background()
var updateErrors []error
dirsToProcess := []string{parentDirectoryPath}
for len(dirsToProcess) > 0 {
dir := dirsToProcess[0]
dirsToProcess = dirsToProcess[1:]
// Process directory in paginated batches
if err := s3a.processDirectoryTTL(ctx, client, dir, ttlSec, &dirsToProcess, &updateErrors); err != nil {
updateErrors = append(updateErrors, err)
}
}
if len(updateErrors) > 0 {
return errors.Join(updateErrors...)
}
return nil
})
}
// processDirectoryTTL processes a single directory in paginated batches
func (s3a *S3ApiServer) processDirectoryTTL(ctx context.Context, client filer_pb.SeaweedFilerClient,
dir string, ttlSec int32, dirsToProcess *[]string, updateErrors *[]error) error {
const batchSize = filer.PaginationSize
startFrom := ""
for {
lastEntryName, entryCount, err := s3a.processTTLBatch(ctx, client, dir, ttlSec, startFrom, batchSize, dirsToProcess, updateErrors)
if err != nil {
return fmt.Errorf("list entries in %s: %w", dir, err)
}
// If we got fewer entries than batch size, we've reached the end
if entryCount < batchSize {
break
}
startFrom = lastEntryName
}
return nil
}
// processTTLBatch processes a single batch of entries
func (s3a *S3ApiServer) processTTLBatch(ctx context.Context, client filer_pb.SeaweedFilerClient,
dir string, ttlSec int32, startFrom string, batchSize uint32,
dirsToProcess *[]string, updateErrors *[]error) (lastEntry string, count int, err error) {
err = filer_pb.SeaweedList(ctx, client, dir, "", func(entry *filer_pb.Entry, isLast bool) error {
lastEntry = entry.Name
count++
if entry.IsDirectory {
*dirsToProcess = append(*dirsToProcess, string(util.NewFullPath(dir, entry.Name)))
return nil
}
// Update entry TTL and S3 expiry flag
if updateErr := s3a.updateEntryTTL(ctx, client, dir, entry, ttlSec); updateErr != nil {
*updateErrors = append(*updateErrors, updateErr)
}
return nil
}, startFrom, false, batchSize)
return lastEntry, count, err
}
// updateEntryTTL updates a single entry's TTL and S3 expiry flag
func (s3a *S3ApiServer) updateEntryTTL(ctx context.Context, client filer_pb.SeaweedFilerClient,
dir string, entry *filer_pb.Entry, ttlSec int32) error {
if entry.Attributes == nil {
entry.Attributes = &filer_pb.FuseAttributes{}
}
if entry.Extended == nil {
entry.Extended = make(map[string][]byte)
}
// Check if both TTL and S3 expiry flag are already set correctly
flagAlreadySet := string(entry.Extended[s3_constants.SeaweedFSExpiresS3]) == "true"
if entry.Attributes.TtlSec == ttlSec && flagAlreadySet {
return nil // Already up to date
}
// Set the S3 expiry flag
entry.Extended[s3_constants.SeaweedFSExpiresS3] = []byte("true")
// Update TTL if needed
if entry.Attributes.TtlSec != ttlSec {
entry.Attributes.TtlSec = ttlSec
}
if err := filer_pb.UpdateEntry(ctx, client, &filer_pb.UpdateEntryRequest{
Directory: dir,
Entry: entry,
}); err != nil {
return fmt.Errorf("file %s/%s: %w", dir, entry.Name, err)
}
return nil
}
func (s3a *S3ApiServer) getCollectionName(bucket string) string {
if s3a.option.FilerGroup != "" {
return fmt.Sprintf("%s_%s", s3a.option.FilerGroup, bucket)

View File

@@ -11,6 +11,7 @@ const (
ExtETagKey = "Seaweed-X-Amz-ETag"
ExtLatestVersionIdKey = "Seaweed-X-Amz-Latest-Version-Id"
ExtLatestVersionFileNameKey = "Seaweed-X-Amz-Latest-Version-File-Name"
ExtMultipartObjectKey = "key"
// Bucket Policy
ExtBucketPolicyKey = "Seaweed-X-Amz-Bucket-Policy"

View File

@@ -42,6 +42,7 @@ const (
SeaweedFSIsDirectoryKey = "X-Seaweedfs-Is-Directory-Key"
SeaweedFSPartNumber = "X-Seaweedfs-Part-Number"
SeaweedFSUploadId = "X-Seaweedfs-Upload-Id"
SeaweedFSExpiresS3 = "X-Seaweedfs-Expires-S3"
// S3 ACL headers
AmzCannedAcl = "X-Amz-Acl"

View File

@@ -7,6 +7,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/util"
"math"
"net/http"
"path"
@@ -792,9 +793,9 @@ func (s3a *S3ApiServer) PutBucketLifecycleConfigurationHandler(w http.ResponseWr
if rule.Expiration.Days == 0 {
continue
}
locationPrefix := fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, rulePrefix)
locConf := &filer_pb.FilerConf_PathConf{
LocationPrefix: fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, rulePrefix),
LocationPrefix: locationPrefix,
Collection: collectionName,
Ttl: fmt.Sprintf("%dd", rule.Expiration.Days),
}
@@ -806,6 +807,13 @@ func (s3a *S3ApiServer) PutBucketLifecycleConfigurationHandler(w http.ResponseWr
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
ttlSec := int32((time.Duration(rule.Expiration.Days) * util.LifeCycleInterval).Seconds())
glog.V(2).Infof("Start updating TTL for %s", locationPrefix)
if updErr := s3a.updateEntriesTTL(locationPrefix, ttlSec); updErr != nil {
glog.Errorf("PutBucketLifecycleConfigurationHandler update TTL for %s: %s", locationPrefix, updErr)
} else {
glog.V(2).Infof("Finished updating TTL for %s", locationPrefix)
}
changed = true
}

View File

@@ -375,7 +375,6 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request)
// Restore the original Range header for SSE processing
if sseObject && originalRangeHeader != "" {
r.Header.Set("Range", originalRangeHeader)
}
// Add SSE metadata headers based on object metadata before SSE processing
@@ -603,7 +602,6 @@ func (s3a *S3ApiServer) proxyToFiler(w http.ResponseWriter, r *http.Request, des
resp.Body.Close()
return
}
setUserMetadataKeyToLowercase(resp)
responseStatusCode, bytesTransferred := responseFn(resp, w)

View File

@@ -1,6 +1,7 @@
package s3api
import (
"context"
"encoding/xml"
"fmt"
"io"
@@ -8,14 +9,11 @@ import (
"slices"
"strings"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
stats_collect "github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/util"
)
@@ -129,22 +127,19 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque
dir, name := target.DirAndName()
err := s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
// Use operation context that won't be cancelled if request terminates
// This ensures deletion completes atomically to avoid inconsistent state
opCtx := context.WithoutCancel(r.Context())
if err := doDeleteEntry(client, dir, name, true, false); err != nil {
return err
}
if s3a.option.AllowEmptyFolder {
return nil
}
directoriesWithDeletion := make(map[string]int)
if strings.LastIndex(object, "/") > 0 {
directoriesWithDeletion[dir]++
// purge empty folders, only checking folders with deletions
for len(directoriesWithDeletion) > 0 {
directoriesWithDeletion = s3a.doDeleteEmptyDirectories(client, directoriesWithDeletion)
}
// Cleanup empty directories
if !s3a.option.AllowEmptyFolder && strings.LastIndex(object, "/") > 0 {
bucketPath := fmt.Sprintf("%s/%s", s3a.option.BucketsPath, bucket)
// Recursively delete empty parent directories, stop at bucket path
filer_pb.DoDeleteEmptyParentDirectories(opCtx, client, util.FullPath(dir), util.FullPath(bucketPath), nil)
}
return nil
@@ -227,7 +222,7 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
var deleteErrors []DeleteError
var auditLog *s3err.AccessLog
directoriesWithDeletion := make(map[string]int)
directoriesWithDeletion := make(map[string]bool)
if s3err.Logger != nil {
auditLog = s3err.GetAccessLog(r, http.StatusNoContent, s3err.ErrNone)
@@ -250,6 +245,9 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
versioningConfigured := (versioningState != "")
s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
// Use operation context that won't be cancelled if request terminates
// This ensures batch deletion completes atomically to avoid inconsistent state
opCtx := context.WithoutCancel(r.Context())
// delete file entries
for _, object := range deleteObjects.Objects {
@@ -359,12 +357,14 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
err := doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
if err == nil {
directoriesWithDeletion[parentDirectoryPath]++
// Track directory for empty directory cleanup
if !s3a.option.AllowEmptyFolder {
directoriesWithDeletion[parentDirectoryPath] = true
}
deletedObjects = append(deletedObjects, object)
} else if strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) {
deletedObjects = append(deletedObjects, object)
} else {
delete(directoriesWithDeletion, parentDirectoryPath)
deleteErrors = append(deleteErrors, DeleteError{
Code: "",
Message: err.Error(),
@@ -380,13 +380,29 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
}
}
if s3a.option.AllowEmptyFolder {
return nil
}
// Cleanup empty directories - optimize by processing deepest first
if !s3a.option.AllowEmptyFolder && len(directoriesWithDeletion) > 0 {
bucketPath := fmt.Sprintf("%s/%s", s3a.option.BucketsPath, bucket)
// purge empty folders, only checking folders with deletions
for len(directoriesWithDeletion) > 0 {
directoriesWithDeletion = s3a.doDeleteEmptyDirectories(client, directoriesWithDeletion)
// Collect and sort directories by depth (deepest first) to avoid redundant checks
var allDirs []string
for dirPath := range directoriesWithDeletion {
allDirs = append(allDirs, dirPath)
}
// Sort by depth (deeper directories first)
slices.SortFunc(allDirs, func(a, b string) int {
return strings.Count(b, "/") - strings.Count(a, "/")
})
// Track already-checked directories to avoid redundant work
checked := make(map[string]bool)
for _, dirPath := range allDirs {
if !checked[dirPath] {
// Recursively delete empty parent directories, stop at bucket path
// Mark this directory and all its parents as checked during recursion
filer_pb.DoDeleteEmptyParentDirectories(opCtx, client, util.FullPath(dirPath), util.FullPath(bucketPath), checked)
}
}
}
return nil
@@ -403,26 +419,3 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
writeSuccessResponseXML(w, r, deleteResp)
}
func (s3a *S3ApiServer) doDeleteEmptyDirectories(client filer_pb.SeaweedFilerClient, directoriesWithDeletion map[string]int) (newDirectoriesWithDeletion map[string]int) {
var allDirs []string
for dir := range directoriesWithDeletion {
allDirs = append(allDirs, dir)
}
slices.SortFunc(allDirs, func(a, b string) int {
return len(b) - len(a)
})
newDirectoriesWithDeletion = make(map[string]int)
for _, dir := range allDirs {
parentDir, dirName := util.FullPath(dir).DirAndName()
if parentDir == s3a.option.BucketsPath {
continue
}
if err := doDeleteEntry(client, parentDir, dirName, false, false); err != nil {
glog.V(4).Infof("directory %s has %d deletion but still not empty: %v", dir, directoriesWithDeletion[dir], err)
} else {
newDirectoriesWithDeletion[parentDir]++
}
}
return
}

View File

@@ -333,7 +333,8 @@ func (s3a *S3ApiServer) putToFiler(r *http.Request, uploadUrl string, dataReader
proxyReq.Header.Set(s3_constants.SeaweedFSSSES3Key, base64.StdEncoding.EncodeToString(sseS3Metadata))
glog.V(3).Infof("putToFiler: storing SSE-S3 metadata for object %s with keyID %s", uploadUrl, sseS3Key.KeyID)
}
// Set TTL-based S3 expiry (modification time)
proxyReq.Header.Set(s3_constants.SeaweedFSExpiresS3, "true")
// ensure that the Authorization header is overriding any previous
// Authorization header which might be already present in proxyReq
s3a.maybeAddFilerJwtAuthorization(proxyReq, true)