lifecycle worker: detect buckets via lifecycle XML metadata (#8808)
* s3api: extend lifecycle XML types with NoncurrentVersionExpiration, AbortIncompleteMultipartUpload Add missing S3 lifecycle rule types to the XML data model: - NoncurrentVersionExpiration with NoncurrentDays and NewerNoncurrentVersions - NoncurrentVersionTransition with NoncurrentDays and StorageClass - AbortIncompleteMultipartUpload with DaysAfterInitiation - Filter.ObjectSizeGreaterThan and ObjectSizeLessThan - And.ObjectSizeGreaterThan and ObjectSizeLessThan - Filter.UnmarshalXML to properly parse Tag, And, and size filter elements Each new type follows the existing set-field pattern for conditional XML marshaling. No behavior changes - these types are not yet wired into handlers or the lifecycle worker. * s3lifecycle: add lifecycle rule evaluator package New package weed/s3api/s3lifecycle/ provides a pure-function lifecycle rule evaluation engine. The evaluator accepts flattened Rule structs and ObjectInfo metadata, and returns the appropriate Action. Components: - evaluator.go: Evaluate() for per-object actions with S3 priority ordering (delete marker > noncurrent version > current expiration), ShouldExpireNoncurrentVersion() with NewerNoncurrentVersions support, EvaluateMPUAbort() for multipart upload rules - filter.go: prefix, tag, and size-based filter matching - tags.go: ExtractTags() extracts S3 tags from filer Extended metadata, HasTagRules() for scan-time optimization - version_time.go: GetVersionTimestamp() extracts timestamps from SeaweedFS version IDs (both old and new format) Comprehensive test coverage: 54 tests covering all action types, filter combinations, edge cases, and version ID formats. * s3api: add UnmarshalXML for Expiration, Transition, ExpireDeleteMarker Add UnmarshalXML methods that set the internal 'set' flag during XML parsing. Previously these flags were only set programmatically, causing XML round-trip to drop elements. This ensures lifecycle configurations stored as XML survive unmarshal/marshal cycles correctly. Add comprehensive XML round-trip tests for all lifecycle rule types including NoncurrentVersionExpiration, AbortIncompleteMultipartUpload, Filter with Tag/And/size constraints, and a complete Terraform-style lifecycle configuration. * s3lifecycle: address review feedback - Fix version_time.go overflow: guard timestampPart > MaxInt64 before the inversion subtraction to prevent uint64 wrap - Make all expiry checks inclusive (!now.Before instead of now.After) so actions trigger at the exact scheduled instant - Add NoncurrentIndex to ObjectInfo so Evaluate() can properly handle NewerNoncurrentVersions via ShouldExpireNoncurrentVersion() - Add test for high-bit overflow version ID * s3lifecycle: guard ShouldExpireNoncurrentVersion against zero SuccessorModTime Add early return when obj.IsLatest or obj.SuccessorModTime.IsZero() to prevent premature expiration of versions with uninitialized successor timestamps (zero value would compute to epoch, always expired). * lifecycle worker: detect buckets with lifecycle XML, not just filer.conf TTLs Update the detection phase to check for stored lifecycle XML in bucket metadata (key: s3-bucket-lifecycle-configuration-xml) in addition to filer.conf TTL entries. A bucket is proposed for lifecycle processing if it has lifecycle XML OR filer.conf TTLs (backward compatible). New proposal parameters: - has_lifecycle_xml: whether the bucket has stored lifecycle XML - versioning_status: the bucket's versioning state (Enabled/Suspended/"") These parameters will be used by the execution phase (subsequent PR) to determine which evaluation path to use. * lifecycle worker: update detection function comment to reflect XML support --------- Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
@@ -10,11 +10,15 @@ import (
|
|||||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
|
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const lifecycleXMLKey = "s3-bucket-lifecycle-configuration-xml"
|
||||||
|
|
||||||
// detectBucketsWithLifecycleRules scans all S3 buckets to find those
|
// detectBucketsWithLifecycleRules scans all S3 buckets to find those
|
||||||
// with lifecycle (TTL) rules configured in filer.conf.
|
// with lifecycle rules, either TTL entries in filer.conf or lifecycle
|
||||||
|
// XML stored in bucket metadata.
|
||||||
func (h *Handler) detectBucketsWithLifecycleRules(
|
func (h *Handler) detectBucketsWithLifecycleRules(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filerClient filer_pb.SeaweedFilerClient,
|
filerClient filer_pb.SeaweedFilerClient,
|
||||||
@@ -53,25 +57,38 @@ func (h *Handler) detectBucketsWithLifecycleRules(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Derive the collection name for this bucket.
|
// Check for lifecycle rules from two sources:
|
||||||
|
// 1. filer.conf TTLs (legacy Expiration.Days fast path)
|
||||||
|
// 2. Stored lifecycle XML in bucket metadata (full rule support)
|
||||||
collection := bucketName
|
collection := bucketName
|
||||||
ttls := fc.GetCollectionTtls(collection)
|
ttls := fc.GetCollectionTtls(collection)
|
||||||
if len(ttls) == 0 {
|
|
||||||
|
hasLifecycleXML := entry.Extended != nil && len(entry.Extended[lifecycleXMLKey]) > 0
|
||||||
|
versioningStatus := ""
|
||||||
|
if entry.Extended != nil {
|
||||||
|
versioningStatus = string(entry.Extended[s3_constants.ExtVersioningKey])
|
||||||
|
}
|
||||||
|
|
||||||
|
ruleCount := int64(len(ttls))
|
||||||
|
if !hasLifecycleXML && ruleCount == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
glog.V(2).Infof("s3_lifecycle: bucket %s has %d lifecycle rule(s)", bucketName, len(ttls))
|
glog.V(2).Infof("s3_lifecycle: bucket %s has %d TTL rule(s), lifecycle_xml=%v, versioning=%s",
|
||||||
|
bucketName, ruleCount, hasLifecycleXML, versioningStatus)
|
||||||
|
|
||||||
proposal := &plugin_pb.JobProposal{
|
proposal := &plugin_pb.JobProposal{
|
||||||
ProposalId: fmt.Sprintf("s3_lifecycle:%s", bucketName),
|
ProposalId: fmt.Sprintf("s3_lifecycle:%s", bucketName),
|
||||||
JobType: jobType,
|
JobType: jobType,
|
||||||
Summary: fmt.Sprintf("Lifecycle management for bucket %s (%d rules)", bucketName, len(ttls)),
|
Summary: fmt.Sprintf("Lifecycle management for bucket %s", bucketName),
|
||||||
DedupeKey: fmt.Sprintf("s3_lifecycle:%s", bucketName),
|
DedupeKey: fmt.Sprintf("s3_lifecycle:%s", bucketName),
|
||||||
Parameters: map[string]*plugin_pb.ConfigValue{
|
Parameters: map[string]*plugin_pb.ConfigValue{
|
||||||
"bucket": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: bucketName}},
|
"bucket": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: bucketName}},
|
||||||
"buckets_path": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: bucketsPath}},
|
"buckets_path": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: bucketsPath}},
|
||||||
"collection": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: collection}},
|
"collection": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: collection}},
|
||||||
"rule_count": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(len(ttls))}},
|
"rule_count": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: ruleCount}},
|
||||||
|
"has_lifecycle_xml": {Kind: &plugin_pb.ConfigValue_BoolValue{BoolValue: hasLifecycleXML}},
|
||||||
|
"versioning_status": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: versioningStatus}},
|
||||||
},
|
},
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"bucket": bucketName,
|
"bucket": bucketName,
|
||||||
|
|||||||
132
weed/plugin/worker/lifecycle/detection_test.go
Normal file
132
weed/plugin/worker/lifecycle/detection_test.go
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
package lifecycle
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBucketHasLifecycleXML(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
extended map[string][]byte
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "has_lifecycle_xml",
|
||||||
|
extended: map[string][]byte{lifecycleXMLKey: []byte("<LifecycleConfiguration/>")},
|
||||||
|
want: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty_lifecycle_xml",
|
||||||
|
extended: map[string][]byte{lifecycleXMLKey: {}},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no_lifecycle_xml",
|
||||||
|
extended: map[string][]byte{"other-key": []byte("value")},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nil_extended",
|
||||||
|
extended: nil,
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := tt.extended != nil && len(tt.extended[lifecycleXMLKey]) > 0
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("hasLifecycleXML = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBucketVersioningStatus(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
extended map[string][]byte
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "versioning_enabled",
|
||||||
|
extended: map[string][]byte{
|
||||||
|
s3_constants.ExtVersioningKey: []byte("Enabled"),
|
||||||
|
},
|
||||||
|
want: "Enabled",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "versioning_suspended",
|
||||||
|
extended: map[string][]byte{
|
||||||
|
s3_constants.ExtVersioningKey: []byte("Suspended"),
|
||||||
|
},
|
||||||
|
want: "Suspended",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no_versioning",
|
||||||
|
extended: map[string][]byte{},
|
||||||
|
want: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nil_extended",
|
||||||
|
extended: nil,
|
||||||
|
want: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
var got string
|
||||||
|
if tt.extended != nil {
|
||||||
|
got = string(tt.extended[s3_constants.ExtVersioningKey])
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("versioningStatus = %q, want %q", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectionProposalParameters(t *testing.T) {
|
||||||
|
// Verify that bucket entries with lifecycle XML or TTL rules produce
|
||||||
|
// proposals with the expected parameters.
|
||||||
|
t.Run("bucket_with_lifecycle_xml_and_versioning", func(t *testing.T) {
|
||||||
|
entry := &filer_pb.Entry{
|
||||||
|
Name: "my-bucket",
|
||||||
|
IsDirectory: true,
|
||||||
|
Extended: map[string][]byte{
|
||||||
|
lifecycleXMLKey: []byte(`<LifecycleConfiguration><Rule><Status>Enabled</Status></Rule></LifecycleConfiguration>`),
|
||||||
|
s3_constants.ExtVersioningKey: []byte("Enabled"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
hasXML := entry.Extended != nil && len(entry.Extended[lifecycleXMLKey]) > 0
|
||||||
|
versioning := ""
|
||||||
|
if entry.Extended != nil {
|
||||||
|
versioning = string(entry.Extended[s3_constants.ExtVersioningKey])
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hasXML {
|
||||||
|
t.Error("expected hasLifecycleXML=true")
|
||||||
|
}
|
||||||
|
if versioning != "Enabled" {
|
||||||
|
t.Errorf("expected versioning=Enabled, got %q", versioning)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("bucket_without_lifecycle_or_ttl_is_skipped", func(t *testing.T) {
|
||||||
|
entry := &filer_pb.Entry{
|
||||||
|
Name: "empty-bucket",
|
||||||
|
IsDirectory: true,
|
||||||
|
Extended: map[string][]byte{},
|
||||||
|
}
|
||||||
|
|
||||||
|
hasXML := entry.Extended != nil && len(entry.Extended[lifecycleXMLKey]) > 0
|
||||||
|
ttlCount := 0 // simulated: no TTL rules in filer.conf
|
||||||
|
|
||||||
|
if hasXML || ttlCount > 0 {
|
||||||
|
t.Error("expected bucket to be skipped (no lifecycle XML, no TTLs)")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user