fix: S3 versioning memory leak in ListObjectVersions pagination (#7813)
* fix: S3 versioning memory leak in ListObjectVersions pagination This commit fixes a memory leak issue in S3 versioning buckets where ListObjectVersions with pagination (key-marker set) would collect ALL versions in the bucket before filtering, causing O(N) memory usage. Root cause: - When keyMarker was set, maxCollect was set to 0 (unlimited) - This caused findVersionsRecursively to traverse the entire bucket - All versions were collected into memory, sorted, then filtered Fix: - Updated findVersionsRecursively to accept keyMarker and versionIdMarker - Skips objects/versions before the marker during recursion (not after) - Always respects maxCollect limit (never unlimited) - Memory usage is now O(maxKeys) instead of O(total versions) Refactoring: - Introduced versionCollector struct to encapsulate collection state - Extracted helper methods for cleaner, more testable code: - matchesPrefixFilter: prefix matching logic - shouldSkipObjectForMarker: keyMarker filtering - shouldSkipVersionForMarker: versionIdMarker filtering - processVersionsDirectory: .versions directory handling - processExplicitDirectory: S3 directory object handling - processRegularFile: pre-versioning file handling - collectVersions: main recursive collection loop - processDirectory: directory entry dispatch This reduces the high QPS on 'find' and 'prefixList' operations by skipping irrelevant objects during traversal. Fixes customer-reported memory leak with high find/prefixList QPS in Grafana for S3 versioning buckets. * s3: infer version ID format from ExtLatestVersionIdKey metadata Simplified version format detection: - Removed ExtVersionIdFormatKey - no longer needed - getVersionIdFormat() now infers format from ExtLatestVersionIdKey - Uses isNewFormatVersionId() to check if latest version uses inverted format This approach is simpler because: - ExtLatestVersionIdKey is already stored in .versions directory metadata - No need for separate format metadata field - Format is naturally determined by the existing version IDs
This commit is contained in:
@@ -946,13 +946,16 @@ func (s3a *S3ApiServer) updateIsLatestFlagsForSuspendedVersioning(bucket, object
|
||||
}
|
||||
|
||||
func (s3a *S3ApiServer) putVersionedObject(r *http.Request, bucket, object string, dataReader io.Reader, objectContentType string) (versionId string, etag string, errCode s3err.ErrorCode, sseMetadata SSEResponseMetadata) {
|
||||
// Generate version ID
|
||||
versionId = generateVersionId()
|
||||
|
||||
// Normalize object path to ensure consistency with toFilerPath behavior
|
||||
normalizedObject := removeDuplicateSlashes(object)
|
||||
|
||||
glog.V(2).Infof("putVersionedObject: creating version %s for %s/%s (normalized: %s)", versionId, bucket, object, normalizedObject)
|
||||
// Check if .versions directory exists to determine format
|
||||
useInvertedFormat := s3a.getVersionIdFormat(bucket, normalizedObject)
|
||||
|
||||
// Generate version ID using the appropriate format
|
||||
versionId = generateVersionId(useInvertedFormat)
|
||||
|
||||
glog.V(2).Infof("putVersionedObject: creating version %s for %s/%s (normalized: %s, inverted=%v)", versionId, bucket, object, normalizedObject, useInvertedFormat)
|
||||
|
||||
// Create the version file name
|
||||
versionFileName := s3a.getVersionFileName(versionId)
|
||||
@@ -961,17 +964,7 @@ func (s3a *S3ApiServer) putVersionedObject(r *http.Request, bucket, object strin
|
||||
// We need to construct the object path relative to the bucket
|
||||
versionObjectPath := normalizedObject + s3_constants.VersionsFolder + "/" + versionFileName
|
||||
versionFilePath := s3a.toFilerPath(bucket, versionObjectPath)
|
||||
|
||||
// Ensure the .versions directory exists before uploading
|
||||
bucketDir := s3a.option.BucketsPath + "/" + bucket
|
||||
versionsDir := normalizedObject + s3_constants.VersionsFolder
|
||||
err := s3a.mkdir(bucketDir, versionsDir, func(entry *filer_pb.Entry) {
|
||||
entry.Attributes.Mime = s3_constants.FolderMimeType
|
||||
})
|
||||
if err != nil {
|
||||
glog.Errorf("putVersionedObject: failed to create .versions directory: %v", err)
|
||||
return "", "", s3err.ErrInternalError, SSEResponseMetadata{}
|
||||
}
|
||||
|
||||
body := dataReader
|
||||
if objectContentType == "" {
|
||||
@@ -989,6 +982,7 @@ func (s3a *S3ApiServer) putVersionedObject(r *http.Request, bucket, object strin
|
||||
// Get the uploaded entry to add versioning metadata
|
||||
// Use retry logic to handle filer consistency delays
|
||||
var versionEntry *filer_pb.Entry
|
||||
var err error
|
||||
maxRetries := 8
|
||||
for attempt := 1; attempt <= maxRetries; attempt++ {
|
||||
versionEntry, err = s3a.getEntry(bucketDir, versionObjectPath)
|
||||
|
||||
Reference in New Issue
Block a user