do delete expired entries on s3 list request (#7426)
* do delete expired entries on s3 list request https://github.com/seaweedfs/seaweedfs/issues/6837 * disable delete expires s3 entry in filer * pass opt allowDeleteObjectsByTTL to all servers * delete on get and head * add lifecycle expiration s3 tests * fix opt allowDeleteObjectsByTTL for server * fix test lifecycle expiration * fix IsExpired * fix locationPrefix for updateEntriesTTL * fix s3tests * resolv coderabbitai * GetS3ExpireTime on filer * go mod * clear TtlSeconds for volume * move s3 delete expired entry to filer * filer delete meta and data * del unusing func removeExpiredObject * test s3 put * test s3 put multipart * allowDeleteObjectsByTTL by default * fix pipline tests * rm dublicate SeaweedFSExpiresS3 * revert expiration tests * fix updateTTL * rm log * resolv comment * fix delete version object * fix S3Versioning * fix delete on FindEntry * fix delete chunks * fix sqlite not support concurrent writes/reads * move deletion out of listing transaction; delete entries and empty folders * Revert "fix sqlite not support concurrent writes/reads" This reverts commit 5d5da14e0ed91c613fe5c0ed058f58bb04fba6f0. * clearer handling on recursive empty directory deletion * handle listing errors * strut copying * reuse code to delete empty folders * use iterative approach with a queue to avoid recursive WithFilerClient calls * stop a gRPC stream from the client-side callback is to return a specific error, e.g., io.EOF * still issue UpdateEntry when the flag must be added * errors join * join path * cleaner * add context, sort directories by depth (deepest first) to avoid redundant checks * batched operation, refactoring * prevent deleting bucket * constant * reuse code * more logging * refactoring * s3 TTL time * Safety check --------- Co-authored-by: chrislu <chris.lu@gmail.com>
This commit is contained in:
committed by
GitHub
parent
cc444b1868
commit
084b377f87
@@ -2,11 +2,14 @@ package s3api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/filer"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util"
|
||||
)
|
||||
|
||||
@@ -108,6 +111,110 @@ func (s3a *S3ApiServer) updateEntry(parentDirectoryPath string, newEntry *filer_
|
||||
return err
|
||||
}
|
||||
|
||||
func (s3a *S3ApiServer) updateEntriesTTL(parentDirectoryPath string, ttlSec int32) error {
|
||||
// Use iterative approach with a queue to avoid recursive WithFilerClient calls
|
||||
// which would create a new connection for each subdirectory
|
||||
return s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
||||
ctx := context.Background()
|
||||
var updateErrors []error
|
||||
dirsToProcess := []string{parentDirectoryPath}
|
||||
|
||||
for len(dirsToProcess) > 0 {
|
||||
dir := dirsToProcess[0]
|
||||
dirsToProcess = dirsToProcess[1:]
|
||||
|
||||
// Process directory in paginated batches
|
||||
if err := s3a.processDirectoryTTL(ctx, client, dir, ttlSec, &dirsToProcess, &updateErrors); err != nil {
|
||||
updateErrors = append(updateErrors, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(updateErrors) > 0 {
|
||||
return errors.Join(updateErrors...)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// processDirectoryTTL processes a single directory in paginated batches
|
||||
func (s3a *S3ApiServer) processDirectoryTTL(ctx context.Context, client filer_pb.SeaweedFilerClient,
|
||||
dir string, ttlSec int32, dirsToProcess *[]string, updateErrors *[]error) error {
|
||||
|
||||
const batchSize = filer.PaginationSize
|
||||
startFrom := ""
|
||||
|
||||
for {
|
||||
lastEntryName, entryCount, err := s3a.processTTLBatch(ctx, client, dir, ttlSec, startFrom, batchSize, dirsToProcess, updateErrors)
|
||||
if err != nil {
|
||||
return fmt.Errorf("list entries in %s: %w", dir, err)
|
||||
}
|
||||
|
||||
// If we got fewer entries than batch size, we've reached the end
|
||||
if entryCount < batchSize {
|
||||
break
|
||||
}
|
||||
startFrom = lastEntryName
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// processTTLBatch processes a single batch of entries
|
||||
func (s3a *S3ApiServer) processTTLBatch(ctx context.Context, client filer_pb.SeaweedFilerClient,
|
||||
dir string, ttlSec int32, startFrom string, batchSize uint32,
|
||||
dirsToProcess *[]string, updateErrors *[]error) (lastEntry string, count int, err error) {
|
||||
|
||||
err = filer_pb.SeaweedList(ctx, client, dir, "", func(entry *filer_pb.Entry, isLast bool) error {
|
||||
lastEntry = entry.Name
|
||||
count++
|
||||
|
||||
if entry.IsDirectory {
|
||||
*dirsToProcess = append(*dirsToProcess, string(util.NewFullPath(dir, entry.Name)))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update entry TTL and S3 expiry flag
|
||||
if updateErr := s3a.updateEntryTTL(ctx, client, dir, entry, ttlSec); updateErr != nil {
|
||||
*updateErrors = append(*updateErrors, updateErr)
|
||||
}
|
||||
return nil
|
||||
}, startFrom, false, batchSize)
|
||||
|
||||
return lastEntry, count, err
|
||||
}
|
||||
|
||||
// updateEntryTTL updates a single entry's TTL and S3 expiry flag
|
||||
func (s3a *S3ApiServer) updateEntryTTL(ctx context.Context, client filer_pb.SeaweedFilerClient,
|
||||
dir string, entry *filer_pb.Entry, ttlSec int32) error {
|
||||
|
||||
if entry.Attributes == nil {
|
||||
entry.Attributes = &filer_pb.FuseAttributes{}
|
||||
}
|
||||
if entry.Extended == nil {
|
||||
entry.Extended = make(map[string][]byte)
|
||||
}
|
||||
|
||||
// Check if both TTL and S3 expiry flag are already set correctly
|
||||
flagAlreadySet := string(entry.Extended[s3_constants.SeaweedFSExpiresS3]) == "true"
|
||||
if entry.Attributes.TtlSec == ttlSec && flagAlreadySet {
|
||||
return nil // Already up to date
|
||||
}
|
||||
|
||||
// Set the S3 expiry flag
|
||||
entry.Extended[s3_constants.SeaweedFSExpiresS3] = []byte("true")
|
||||
// Update TTL if needed
|
||||
if entry.Attributes.TtlSec != ttlSec {
|
||||
entry.Attributes.TtlSec = ttlSec
|
||||
}
|
||||
|
||||
if err := filer_pb.UpdateEntry(ctx, client, &filer_pb.UpdateEntryRequest{
|
||||
Directory: dir,
|
||||
Entry: entry,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("file %s/%s: %w", dir, entry.Name, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s3a *S3ApiServer) getCollectionName(bucket string) string {
|
||||
if s3a.option.FilerGroup != "" {
|
||||
return fmt.Sprintf("%s_%s", s3a.option.FilerGroup, bucket)
|
||||
|
||||
Reference in New Issue
Block a user