* lifecycle worker: add NoncurrentVersionExpiration support Add version-aware scanning to the rule-based execution path. When the walker encounters a .versions directory, processVersionsDirectory(): - Lists all version entries (v_<versionId>) - Sorts by version timestamp (newest first) - Walks non-current versions with ShouldExpireNoncurrentVersion() which handles both NoncurrentDays and NewerNoncurrentVersions - Extracts successor time from version IDs (both old/new format) - Skips delete markers in noncurrent version counting - Falls back to entry Mtime when version ID timestamp is unavailable Helper functions: - sortVersionsByTimestamp: insertion sort by version ID timestamp - getEntryVersionTimestamp: extracts timestamp with Mtime fallback * lifecycle worker: address review feedback for noncurrent versions - Use sentinel errLimitReached in versions directory handler - Set NoncurrentIndex on ObjectInfo for proper NewerNoncurrentVersions evaluation * lifecycle worker: fail closed on XML parse error, guard zero Mtime - Fail closed when lifecycle XML exists but fails to parse, instead of falling back to TTL which could apply broader rules - Guard Mtime > 0 before using time.Unix(mtime, 0) to avoid mapping unset Mtime to 1970, which would misorder versions and cause premature expiration * lifecycle worker: count delete markers toward NoncurrentIndex Noncurrent delete markers should count toward the NewerNoncurrentVersions retention threshold so data versions get the correct position index. Previously, skipping delete markers without incrementing the index could retain too many versions after delete/recreate cycles. * lifecycle worker: fix version ordering, error propagation, and fail-closed scope 1. Use full version ID comparison (CompareVersionIds) for sorting .versions entries, not just decoded timestamps. Two versions with the same timestamp prefix but different random suffixes were previously misordered, potentially treating the newest version as noncurrent and deleting it. 2. Propagate .versions listing failures to the caller instead of swallowing them with (nil, 0). Transient filer errors on a .versions directory now surface in the job result. 3. Narrow the fail-closed path to only malformed lifecycle XML (errMalformedLifecycleXML). Transient filer LookupEntry errors now fall back to TTL with a warning, matching the original intent of "fail closed on bad config, not on network blips." * lifecycle worker: only skip .uploads at bucket root * lifecycle worker: sort.Slice, mixed-format test, XML presence tracking - Replace manual insertion sort with sort.Slice in sortVersionsByVersionId - Add TestCompareVersionIdsMixedFormats covering old/new format ordering - Distinguish "no lifecycle XML" (nil) from "XML present but no effective rules" (non-nil empty slice) so buckets with all-disabled rules don't incorrectly fall back to filer.conf TTL expiration * lifecycle worker: guard nil Attributes, use TrimSuffix in test - Guard entry.Attributes != nil before accessing GetFileSize() and Mtime in both listExpiredObjectsByRules and processVersionsDirectory - Use strings.TrimPrefix/TrimSuffix in TestVersionsDirectoryNaming to match the production code pattern * lifecycle worker: skip TTL scan when XML present, fix test assertions - When lifecycle XML is present but has no effective rules, skip object scanning entirely instead of falling back to TTL path - Test sort output against concrete expected names instead of re-using the same comparator as the sort itself --------- Co-authored-by: Copilot <copilot@github.com>
200 lines
6.6 KiB
Go
200 lines
6.6 KiB
Go
package lifecycle
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/xml"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3lifecycle"
|
|
)
|
|
|
|
// lifecycleConfig mirrors the XML structure just enough to parse rules.
|
|
// We define a minimal local struct to avoid importing the s3api package
|
|
// (which would create a circular dependency if s3api ever imports the worker).
|
|
type lifecycleConfig struct {
|
|
XMLName xml.Name `xml:"LifecycleConfiguration"`
|
|
Rules []lifecycleConfigRule `xml:"Rule"`
|
|
}
|
|
|
|
type lifecycleConfigRule struct {
|
|
ID string `xml:"ID"`
|
|
Status string `xml:"Status"`
|
|
Filter lifecycleFilter `xml:"Filter"`
|
|
Prefix string `xml:"Prefix"`
|
|
Expiration lifecycleExpiration `xml:"Expiration"`
|
|
NoncurrentVersionExpiration noncurrentVersionExpiration `xml:"NoncurrentVersionExpiration"`
|
|
AbortIncompleteMultipartUpload abortMPU `xml:"AbortIncompleteMultipartUpload"`
|
|
}
|
|
|
|
type lifecycleFilter struct {
|
|
Prefix string `xml:"Prefix"`
|
|
Tag lifecycleTag `xml:"Tag"`
|
|
And lifecycleAnd `xml:"And"`
|
|
ObjectSizeGreaterThan int64 `xml:"ObjectSizeGreaterThan"`
|
|
ObjectSizeLessThan int64 `xml:"ObjectSizeLessThan"`
|
|
}
|
|
|
|
type lifecycleAnd struct {
|
|
Prefix string `xml:"Prefix"`
|
|
Tags []lifecycleTag `xml:"Tag"`
|
|
ObjectSizeGreaterThan int64 `xml:"ObjectSizeGreaterThan"`
|
|
ObjectSizeLessThan int64 `xml:"ObjectSizeLessThan"`
|
|
}
|
|
|
|
type lifecycleTag struct {
|
|
Key string `xml:"Key"`
|
|
Value string `xml:"Value"`
|
|
}
|
|
|
|
type lifecycleExpiration struct {
|
|
Days int `xml:"Days"`
|
|
Date string `xml:"Date"`
|
|
ExpiredObjectDeleteMarker bool `xml:"ExpiredObjectDeleteMarker"`
|
|
}
|
|
|
|
type noncurrentVersionExpiration struct {
|
|
NoncurrentDays int `xml:"NoncurrentDays"`
|
|
NewerNoncurrentVersions int `xml:"NewerNoncurrentVersions"`
|
|
}
|
|
|
|
type abortMPU struct {
|
|
DaysAfterInitiation int `xml:"DaysAfterInitiation"`
|
|
}
|
|
|
|
// errMalformedLifecycleXML indicates the lifecycle XML exists but could not be parsed.
|
|
// Callers should fail closed (not fall back to TTL) to avoid broader deletions.
|
|
var errMalformedLifecycleXML = errors.New("malformed lifecycle XML")
|
|
|
|
// loadLifecycleRulesFromBucket reads the lifecycle XML from a bucket's
|
|
// metadata and converts it to evaluator-friendly rules.
|
|
//
|
|
// Returns:
|
|
// - (rules, nil) when lifecycle XML is configured and parseable
|
|
// - (nil, nil) when no lifecycle XML is configured (caller should use TTL fallback)
|
|
// - (nil, errMalformedLifecycleXML) when XML exists but is malformed (fail closed)
|
|
// - (nil, err) for transient filer errors (caller should use TTL fallback with warning)
|
|
func loadLifecycleRulesFromBucket(
|
|
ctx context.Context,
|
|
client filer_pb.SeaweedFilerClient,
|
|
bucketsPath, bucket string,
|
|
) ([]s3lifecycle.Rule, error) {
|
|
bucketDir := bucketsPath
|
|
resp, err := filer_pb.LookupEntry(ctx, client, &filer_pb.LookupDirectoryEntryRequest{
|
|
Directory: bucketDir,
|
|
Name: bucket,
|
|
})
|
|
if err != nil {
|
|
// Transient filer error — not the same as malformed XML.
|
|
return nil, fmt.Errorf("lookup bucket %s: %w", bucket, err)
|
|
}
|
|
if resp.Entry == nil || resp.Entry.Extended == nil {
|
|
return nil, nil
|
|
}
|
|
xmlData := resp.Entry.Extended[lifecycleXMLKey]
|
|
if len(xmlData) == 0 {
|
|
return nil, nil
|
|
}
|
|
rules, parseErr := parseLifecycleXML(xmlData)
|
|
if parseErr != nil {
|
|
return nil, fmt.Errorf("%w: bucket %s: %v", errMalformedLifecycleXML, bucket, parseErr)
|
|
}
|
|
// Return non-nil empty slice when XML was present but yielded no rules
|
|
// (e.g., all rules disabled). This lets callers distinguish "no XML" (nil)
|
|
// from "XML present, no effective rules" (empty slice).
|
|
if rules == nil {
|
|
rules = []s3lifecycle.Rule{}
|
|
}
|
|
return rules, nil
|
|
}
|
|
|
|
// parseLifecycleXML parses lifecycle configuration XML and converts it
|
|
// to evaluator-friendly rules.
|
|
func parseLifecycleXML(data []byte) ([]s3lifecycle.Rule, error) {
|
|
var config lifecycleConfig
|
|
if err := xml.NewDecoder(bytes.NewReader(data)).Decode(&config); err != nil {
|
|
return nil, fmt.Errorf("decode lifecycle XML: %w", err)
|
|
}
|
|
|
|
var rules []s3lifecycle.Rule
|
|
for _, r := range config.Rules {
|
|
rule := s3lifecycle.Rule{
|
|
ID: r.ID,
|
|
Status: r.Status,
|
|
}
|
|
|
|
// Resolve prefix: Filter.And.Prefix > Filter.Prefix > Rule.Prefix
|
|
switch {
|
|
case r.Filter.And.Prefix != "" || len(r.Filter.And.Tags) > 0 ||
|
|
r.Filter.And.ObjectSizeGreaterThan > 0 || r.Filter.And.ObjectSizeLessThan > 0:
|
|
rule.Prefix = r.Filter.And.Prefix
|
|
rule.FilterTags = tagsToMap(r.Filter.And.Tags)
|
|
rule.FilterSizeGreaterThan = r.Filter.And.ObjectSizeGreaterThan
|
|
rule.FilterSizeLessThan = r.Filter.And.ObjectSizeLessThan
|
|
case r.Filter.Tag.Key != "":
|
|
rule.Prefix = r.Filter.Prefix
|
|
rule.FilterTags = map[string]string{r.Filter.Tag.Key: r.Filter.Tag.Value}
|
|
rule.FilterSizeGreaterThan = r.Filter.ObjectSizeGreaterThan
|
|
rule.FilterSizeLessThan = r.Filter.ObjectSizeLessThan
|
|
default:
|
|
if r.Filter.Prefix != "" {
|
|
rule.Prefix = r.Filter.Prefix
|
|
} else {
|
|
rule.Prefix = r.Prefix
|
|
}
|
|
rule.FilterSizeGreaterThan = r.Filter.ObjectSizeGreaterThan
|
|
rule.FilterSizeLessThan = r.Filter.ObjectSizeLessThan
|
|
}
|
|
|
|
rule.ExpirationDays = r.Expiration.Days
|
|
rule.ExpiredObjectDeleteMarker = r.Expiration.ExpiredObjectDeleteMarker
|
|
rule.NoncurrentVersionExpirationDays = r.NoncurrentVersionExpiration.NoncurrentDays
|
|
rule.NewerNoncurrentVersions = r.NoncurrentVersionExpiration.NewerNoncurrentVersions
|
|
rule.AbortMPUDaysAfterInitiation = r.AbortIncompleteMultipartUpload.DaysAfterInitiation
|
|
|
|
// Parse Date if present.
|
|
if r.Expiration.Date != "" {
|
|
// Date may be RFC3339 or ISO 8601 date-only.
|
|
parsed, parseErr := parseExpirationDate(r.Expiration.Date)
|
|
if parseErr != nil {
|
|
glog.V(1).Infof("s3_lifecycle: skipping rule %s: invalid expiration date %q: %v", r.ID, r.Expiration.Date, parseErr)
|
|
continue
|
|
}
|
|
rule.ExpirationDate = parsed
|
|
}
|
|
|
|
rules = append(rules, rule)
|
|
}
|
|
return rules, nil
|
|
}
|
|
|
|
func tagsToMap(tags []lifecycleTag) map[string]string {
|
|
if len(tags) == 0 {
|
|
return nil
|
|
}
|
|
m := make(map[string]string, len(tags))
|
|
for _, t := range tags {
|
|
m[t.Key] = t.Value
|
|
}
|
|
return m
|
|
}
|
|
|
|
func parseExpirationDate(s string) (time.Time, error) {
|
|
// Try RFC3339 first, then ISO 8601 date-only.
|
|
formats := []string{
|
|
"2006-01-02T15:04:05Z07:00",
|
|
"2006-01-02",
|
|
}
|
|
for _, f := range formats {
|
|
t, err := time.Parse(f, s)
|
|
if err == nil {
|
|
return t, nil
|
|
}
|
|
}
|
|
return time.Time{}, fmt.Errorf("unrecognized date format: %s", s)
|
|
}
|