s3api: make conditional mutations atomic and AWS-compatible (#8802)
* s3api: serialize conditional write finalization * s3api: add conditional delete mutation checks * s3api: enforce destination conditions for copy * s3api: revalidate multipart completion under lock * s3api: rollback failed put finalization hooks * s3api: report delete-marker version deletions * s3api: fix copy destination versioning edge cases * s3api: make versioned multipart completion idempotent * test/s3: cover conditional mutation regressions * s3api: rollback failed copy version finalization * s3api: resolve suspended delete conditions via latest entry * s3api: remove copy test null-version injection * s3api: reject out-of-order multipart completions * s3api: preserve multipart replay version metadata * s3api: surface copy destination existence errors * s3api: simplify delete condition target resolution * test/s3: make conditional delete assertions order independent * test/s3: add distributed lock gateway integration * s3api: fail closed multipart versioned completion * s3api: harden copy metadata and overwrite paths * s3api: create delete markers for suspended deletes * s3api: allow duplicate multipart completion parts
This commit is contained in:
@@ -14,7 +14,6 @@ import (
|
||||
"net/url"
|
||||
"path"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -166,63 +165,73 @@ func copySSEHeadersFromFirstPart(dst *filer_pb.Entry, firstPart *filer_pb.Entry,
|
||||
}
|
||||
}
|
||||
|
||||
func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.CompleteMultipartUploadInput, parts *CompleteMultipartUpload) (output *CompleteMultipartUploadResult, code s3err.ErrorCode) {
|
||||
type multipartPartBoundary struct {
|
||||
PartNumber int `json:"part"`
|
||||
StartChunk int `json:"start"`
|
||||
EndChunk int `json:"end"`
|
||||
ETag string `json:"etag"`
|
||||
}
|
||||
|
||||
glog.V(2).Infof("completeMultipartUpload input %v", input)
|
||||
if len(parts.Parts) == 0 {
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()
|
||||
return nil, s3err.ErrNoSuchUpload
|
||||
type multipartCompletionState struct {
|
||||
deleteEntries []*filer_pb.Entry
|
||||
partEntries map[int][]*filer_pb.Entry
|
||||
pentry *filer_pb.Entry
|
||||
mime string
|
||||
finalParts []*filer_pb.FileChunk
|
||||
offset int64
|
||||
partBoundaries []multipartPartBoundary
|
||||
multipartETag string
|
||||
entityWithTtl bool
|
||||
}
|
||||
|
||||
func completeMultipartResult(r *http.Request, input *s3.CompleteMultipartUploadInput, etag string, entry *filer_pb.Entry) *CompleteMultipartUploadResult {
|
||||
result := &CompleteMultipartUploadResult{
|
||||
Location: aws.String(fmt.Sprintf("%s://%s/%s/%s", getRequestScheme(r), r.Host, url.PathEscape(*input.Bucket), urlPathEscape(*input.Key))),
|
||||
Bucket: input.Bucket,
|
||||
ETag: aws.String(etag),
|
||||
Key: objectKey(input.Key),
|
||||
}
|
||||
completedPartNumbers := []int{}
|
||||
completedPartMap := make(map[int][]string)
|
||||
|
||||
maxPartNo := 1
|
||||
|
||||
for _, part := range parts.Parts {
|
||||
if _, ok := completedPartMap[part.PartNumber]; !ok {
|
||||
completedPartNumbers = append(completedPartNumbers, part.PartNumber)
|
||||
}
|
||||
completedPartMap[part.PartNumber] = append(completedPartMap[part.PartNumber], part.ETag)
|
||||
maxPartNo = maxInt(maxPartNo, part.PartNumber)
|
||||
}
|
||||
sort.Ints(completedPartNumbers)
|
||||
|
||||
uploadDirectory := s3a.genUploadsFolder(*input.Bucket) + "/" + *input.UploadId
|
||||
// Use explicit limit to ensure all parts are listed (up to S3's max of 10,000 parts)
|
||||
// Previously limit=0 relied on server's DirListingLimit default (1000 in weed server mode),
|
||||
// which caused CompleteMultipartUpload to fail for uploads with more than 1000 parts.
|
||||
entries, _, err := s3a.list(uploadDirectory, "", "", false, s3_constants.MaxS3MultipartParts+1)
|
||||
if err != nil {
|
||||
glog.Errorf("completeMultipartUpload %s %s error: %v, entries:%d", *input.Bucket, *input.UploadId, err, len(entries))
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()
|
||||
return nil, s3err.ErrNoSuchUpload
|
||||
}
|
||||
|
||||
if len(entries) == 0 {
|
||||
entryName, dirName := s3a.getEntryNameAndDir(input)
|
||||
if entry, _ := s3a.getEntry(dirName, entryName); entry != nil && entry.Extended != nil {
|
||||
if uploadId, ok := entry.Extended[s3_constants.SeaweedFSUploadId]; ok && *input.UploadId == string(uploadId) {
|
||||
// Location uses the S3 endpoint that the client connected to
|
||||
// Format: scheme://s3-endpoint/bucket/object (following AWS S3 API)
|
||||
return &CompleteMultipartUploadResult{
|
||||
Location: aws.String(fmt.Sprintf("%s://%s/%s/%s", getRequestScheme(r), r.Host, url.PathEscape(*input.Bucket), urlPathEscape(*input.Key))),
|
||||
Bucket: input.Bucket,
|
||||
ETag: aws.String(getEtagFromEntry(entry)),
|
||||
Key: objectKey(input.Key),
|
||||
}, s3err.ErrNone
|
||||
if entry != nil && entry.Extended != nil {
|
||||
if versionIdBytes, ok := entry.Extended[s3_constants.ExtVersionIdKey]; ok {
|
||||
versionId := string(versionIdBytes)
|
||||
if versionId != "" && versionId != "null" {
|
||||
result.VersionId = aws.String(versionId)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (s3a *S3ApiServer) prepareMultipartCompletionState(r *http.Request, input *s3.CompleteMultipartUploadInput, uploadDirectory, entryName, dirName string, completedPartNumbers []int, completedPartMap map[int][]string, maxPartNo int) (*multipartCompletionState, *CompleteMultipartUploadResult, s3err.ErrorCode) {
|
||||
if entry, err := s3a.resolveObjectEntry(*input.Bucket, *input.Key); err == nil && entry != nil && entry.Extended != nil {
|
||||
if uploadId, ok := entry.Extended[s3_constants.SeaweedFSUploadId]; ok && *input.UploadId == string(uploadId) {
|
||||
cleanupEntries, _, cleanupErr := s3a.list(uploadDirectory, "", "", false, s3_constants.MaxS3MultipartParts+1)
|
||||
if cleanupErr != nil && !errors.Is(cleanupErr, filer_pb.ErrNotFound) {
|
||||
glog.Warningf("completeMultipartUpload: failed to list stale upload directory %s for cleanup: %v", uploadDirectory, cleanupErr)
|
||||
}
|
||||
return &multipartCompletionState{deleteEntries: cleanupEntries}, completeMultipartResult(r, input, getEtagFromEntry(entry), entry), s3err.ErrNone
|
||||
}
|
||||
}
|
||||
|
||||
entries, _, err := s3a.list(uploadDirectory, "", "", false, s3_constants.MaxS3MultipartParts+1)
|
||||
if err != nil {
|
||||
glog.Errorf("completeMultipartUpload %s %s error: %v", *input.Bucket, *input.UploadId, err)
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()
|
||||
return nil, s3err.ErrNoSuchUpload
|
||||
return nil, nil, s3err.ErrNoSuchUpload
|
||||
}
|
||||
if len(entries) == 0 {
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()
|
||||
return nil, nil, s3err.ErrNoSuchUpload
|
||||
}
|
||||
|
||||
pentry, err := s3a.getEntry(s3a.genUploadsFolder(*input.Bucket), *input.UploadId)
|
||||
if err != nil {
|
||||
glog.Errorf("completeMultipartUpload %s %s error: %v", *input.Bucket, *input.UploadId, err)
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()
|
||||
return nil, s3err.ErrNoSuchUpload
|
||||
return nil, nil, s3err.ErrNoSuchUpload
|
||||
}
|
||||
deleteEntries := []*filer_pb.Entry{}
|
||||
|
||||
deleteEntries := make([]*filer_pb.Entry, 0)
|
||||
partEntries := make(map[int][]*filer_pb.Entry, len(entries))
|
||||
entityTooSmall := false
|
||||
entityWithTtl := false
|
||||
@@ -232,10 +241,10 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
if entry.IsDirectory || !strings.HasSuffix(entry.Name, multipartExt) {
|
||||
continue
|
||||
}
|
||||
partNumber, err := parsePartNumber(entry.Name)
|
||||
if err != nil {
|
||||
partNumber, parseErr := parsePartNumber(entry.Name)
|
||||
if parseErr != nil {
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedPartNumber).Inc()
|
||||
glog.Errorf("completeMultipartUpload failed to pasre partNumber %s:%s", entry.Name, err)
|
||||
glog.Errorf("completeMultipartUpload failed to parse partNumber %s:%s", entry.Name, parseErr)
|
||||
continue
|
||||
}
|
||||
completedPartsByNumber, ok := completedPartMap[partNumber]
|
||||
@@ -259,7 +268,6 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedPartEmpty).Inc()
|
||||
continue
|
||||
}
|
||||
//there maybe multi same part, because of client retry
|
||||
partEntries[partNumber] = append(partEntries[partNumber], entry)
|
||||
foundEntry = true
|
||||
}
|
||||
@@ -278,27 +286,23 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
}
|
||||
if entityTooSmall {
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompleteEntityTooSmall).Inc()
|
||||
return nil, s3err.ErrEntityTooSmall
|
||||
return nil, nil, s3err.ErrEntityTooSmall
|
||||
}
|
||||
mime := pentry.Attributes.Mime
|
||||
var finalParts []*filer_pb.FileChunk
|
||||
var offset int64
|
||||
|
||||
// Track part boundaries for later retrieval with PartNumber parameter
|
||||
type PartBoundary struct {
|
||||
PartNumber int `json:"part"`
|
||||
StartChunk int `json:"start"`
|
||||
EndChunk int `json:"end"` // exclusive
|
||||
ETag string `json:"etag"`
|
||||
mime := ""
|
||||
if pentry.Attributes != nil {
|
||||
mime = pentry.Attributes.Mime
|
||||
}
|
||||
var partBoundaries []PartBoundary
|
||||
finalParts := make([]*filer_pb.FileChunk, 0)
|
||||
partBoundaries := make([]multipartPartBoundary, 0, len(completedPartNumbers))
|
||||
var offset int64
|
||||
|
||||
for _, partNumber := range completedPartNumbers {
|
||||
partEntriesByNumber, ok := partEntries[partNumber]
|
||||
if !ok {
|
||||
glog.Errorf("part %d has no entry", partNumber)
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedPartNotFound).Inc()
|
||||
return nil, s3err.ErrInvalidPart
|
||||
return nil, nil, s3err.ErrInvalidPart
|
||||
}
|
||||
found := false
|
||||
|
||||
@@ -312,20 +316,11 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
continue
|
||||
}
|
||||
|
||||
// Record the start chunk index for this part
|
||||
partStartChunk := len(finalParts)
|
||||
|
||||
// Calculate the part's ETag (for GetObject with PartNumber)
|
||||
partETag := filer.ETag(entry)
|
||||
|
||||
for _, chunk := range entry.GetChunks() {
|
||||
// CRITICAL: Do NOT modify SSE metadata offsets during assembly!
|
||||
// The encrypted data was created with the offset stored in chunk.SseMetadata.
|
||||
// Changing the offset here would cause decryption to fail because CTR mode
|
||||
// uses the offset to initialize the counter. We must decrypt with the same
|
||||
// offset that was used during encryption.
|
||||
|
||||
p := &filer_pb.FileChunk{
|
||||
finalParts = append(finalParts, &filer_pb.FileChunk{
|
||||
FileId: chunk.GetFileIdString(),
|
||||
Offset: offset,
|
||||
Size: chunk.Size,
|
||||
@@ -333,17 +328,14 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
CipherKey: chunk.CipherKey,
|
||||
ETag: chunk.ETag,
|
||||
IsCompressed: chunk.IsCompressed,
|
||||
// Preserve SSE metadata UNCHANGED - do not modify the offset!
|
||||
SseType: chunk.SseType,
|
||||
SseMetadata: chunk.SseMetadata,
|
||||
}
|
||||
finalParts = append(finalParts, p)
|
||||
SseType: chunk.SseType,
|
||||
SseMetadata: chunk.SseMetadata,
|
||||
})
|
||||
offset += int64(chunk.Size)
|
||||
}
|
||||
|
||||
// Record the part boundary
|
||||
partEndChunk := len(finalParts)
|
||||
partBoundaries = append(partBoundaries, PartBoundary{
|
||||
partBoundaries = append(partBoundaries, multipartPartBoundary{
|
||||
PartNumber: partNumber,
|
||||
StartChunk: partStartChunk,
|
||||
EndChunk: partEndChunk,
|
||||
@@ -354,165 +346,218 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
}
|
||||
}
|
||||
|
||||
return &multipartCompletionState{
|
||||
deleteEntries: deleteEntries,
|
||||
partEntries: partEntries,
|
||||
pentry: pentry,
|
||||
mime: mime,
|
||||
finalParts: finalParts,
|
||||
offset: offset,
|
||||
partBoundaries: partBoundaries,
|
||||
multipartETag: calculateMultipartETag(partEntries, completedPartNumbers),
|
||||
entityWithTtl: entityWithTtl,
|
||||
}, nil, s3err.ErrNone
|
||||
}
|
||||
|
||||
func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.CompleteMultipartUploadInput, parts *CompleteMultipartUpload) (output *CompleteMultipartUploadResult, code s3err.ErrorCode) {
|
||||
|
||||
glog.V(2).Infof("completeMultipartUpload input %v", input)
|
||||
if len(parts.Parts) == 0 {
|
||||
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()
|
||||
return nil, s3err.ErrNoSuchUpload
|
||||
}
|
||||
completedPartNumbers := []int{}
|
||||
completedPartMap := make(map[int][]string)
|
||||
|
||||
maxPartNo := 1
|
||||
lastSeenPartNo := 0
|
||||
|
||||
for _, part := range parts.Parts {
|
||||
if part.PartNumber < lastSeenPartNo {
|
||||
return nil, s3err.ErrInvalidPartOrder
|
||||
}
|
||||
lastSeenPartNo = part.PartNumber
|
||||
if _, ok := completedPartMap[part.PartNumber]; !ok {
|
||||
completedPartNumbers = append(completedPartNumbers, part.PartNumber)
|
||||
}
|
||||
completedPartMap[part.PartNumber] = append(completedPartMap[part.PartNumber], part.ETag)
|
||||
maxPartNo = maxInt(maxPartNo, part.PartNumber)
|
||||
}
|
||||
|
||||
uploadDirectory := s3a.genUploadsFolder(*input.Bucket) + "/" + *input.UploadId
|
||||
entryName, dirName := s3a.getEntryNameAndDir(input)
|
||||
|
||||
// Precompute ETag once for consistency across all paths
|
||||
multipartETag := calculateMultipartETag(partEntries, completedPartNumbers)
|
||||
etagQuote := "\"" + multipartETag + "\""
|
||||
|
||||
// Check if versioning is configured for this bucket BEFORE creating any files
|
||||
versioningState, vErr := s3a.getVersioningState(*input.Bucket)
|
||||
if vErr == nil && versioningState == s3_constants.VersioningEnabled {
|
||||
// Use full object key (not just entryName) to ensure correct .versions directory is checked
|
||||
normalizedKey := strings.TrimPrefix(*input.Key, "/")
|
||||
useInvertedFormat := s3a.getVersionIdFormat(*input.Bucket, normalizedKey)
|
||||
versionId := generateVersionId(useInvertedFormat)
|
||||
versionFileName := s3a.getVersionFileName(versionId)
|
||||
versionDir := dirName + "/" + entryName + s3_constants.VersionsFolder
|
||||
|
||||
// Capture timestamp and owner once for consistency between version entry and cache entry
|
||||
versionMtime := time.Now().Unix()
|
||||
amzAccountId := r.Header.Get(s3_constants.AmzAccountId)
|
||||
|
||||
// Create the version file in the .versions directory
|
||||
err = s3a.mkFile(versionDir, versionFileName, finalParts, func(versionEntry *filer_pb.Entry) {
|
||||
if versionEntry.Extended == nil {
|
||||
versionEntry.Extended = make(map[string][]byte)
|
||||
}
|
||||
versionEntry.Extended[s3_constants.ExtVersionIdKey] = []byte(versionId)
|
||||
versionEntry.Extended[s3_constants.SeaweedFSUploadId] = []byte(*input.UploadId)
|
||||
// Store parts count for x-amz-mp-parts-count header
|
||||
versionEntry.Extended[s3_constants.SeaweedFSMultipartPartsCount] = []byte(fmt.Sprintf("%d", len(completedPartNumbers)))
|
||||
// Store part boundaries for GetObject with PartNumber
|
||||
if partBoundariesJSON, err := json.Marshal(partBoundaries); err == nil {
|
||||
versionEntry.Extended[s3_constants.SeaweedFSMultipartPartBoundaries] = partBoundariesJSON
|
||||
}
|
||||
|
||||
// Set object owner for versioned multipart objects
|
||||
if amzAccountId != "" {
|
||||
versionEntry.Extended[s3_constants.ExtAmzOwnerKey] = []byte(amzAccountId)
|
||||
}
|
||||
|
||||
for k, v := range pentry.Extended {
|
||||
if k != s3_constants.ExtMultipartObjectKey {
|
||||
versionEntry.Extended[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
// Persist ETag to ensure subsequent HEAD/GET uses the same value
|
||||
versionEntry.Extended[s3_constants.ExtETagKey] = []byte(multipartETag)
|
||||
|
||||
// Preserve ALL SSE metadata from the first part (if any)
|
||||
// SSE metadata is stored in individual parts, not the upload directory
|
||||
if len(completedPartNumbers) > 0 && len(partEntries[completedPartNumbers[0]]) > 0 {
|
||||
firstPartEntry := partEntries[completedPartNumbers[0]][0]
|
||||
copySSEHeadersFromFirstPart(versionEntry, firstPartEntry, "versioned")
|
||||
}
|
||||
if pentry.Attributes.Mime != "" {
|
||||
versionEntry.Attributes.Mime = pentry.Attributes.Mime
|
||||
} else if mime != "" {
|
||||
versionEntry.Attributes.Mime = mime
|
||||
}
|
||||
versionEntry.Attributes.FileSize = uint64(offset)
|
||||
versionEntry.Attributes.Mtime = versionMtime
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
glog.Errorf("completeMultipartUpload: failed to create version %s: %v", versionId, err)
|
||||
return nil, s3err.ErrInternalError
|
||||
var completionState *multipartCompletionState
|
||||
finalizeCode := s3a.withObjectWriteLock(*input.Bucket, *input.Key, func() s3err.ErrorCode {
|
||||
return s3a.checkConditionalHeaders(r, *input.Bucket, *input.Key)
|
||||
}, func() s3err.ErrorCode {
|
||||
var prepCode s3err.ErrorCode
|
||||
completionState, output, prepCode = s3a.prepareMultipartCompletionState(r, input, uploadDirectory, entryName, dirName, completedPartNumbers, completedPartMap, maxPartNo)
|
||||
if prepCode != s3err.ErrNone || output != nil {
|
||||
return prepCode
|
||||
}
|
||||
|
||||
// Construct entry with metadata for caching in .versions directory
|
||||
// Reuse versionMtime to keep list vs. HEAD timestamps aligned
|
||||
// multipartETag is precomputed
|
||||
versionEntryForCache := &filer_pb.Entry{
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
FileSize: uint64(offset),
|
||||
Mtime: versionMtime,
|
||||
},
|
||||
Extended: map[string][]byte{
|
||||
s3_constants.ExtETagKey: []byte(multipartETag),
|
||||
},
|
||||
}
|
||||
if amzAccountId != "" {
|
||||
versionEntryForCache.Extended[s3_constants.ExtAmzOwnerKey] = []byte(amzAccountId)
|
||||
etagQuote := "\"" + completionState.multipartETag + "\""
|
||||
// Check if versioning is configured for this bucket BEFORE creating any files.
|
||||
versioningState, vErr := s3a.getVersioningState(*input.Bucket)
|
||||
if vErr != nil {
|
||||
glog.Errorf("completeMultipartUpload: failed to get versioning state for bucket %s: %v", *input.Bucket, vErr)
|
||||
return s3err.ErrInternalError
|
||||
}
|
||||
if versioningState == s3_constants.VersioningEnabled {
|
||||
// Use full object key (not just entryName) to ensure correct .versions directory is checked
|
||||
normalizedKey := strings.TrimPrefix(*input.Key, "/")
|
||||
useInvertedFormat := s3a.getVersionIdFormat(*input.Bucket, normalizedKey)
|
||||
versionId := generateVersionId(useInvertedFormat)
|
||||
versionFileName := s3a.getVersionFileName(versionId)
|
||||
versionDir := dirName + "/" + entryName + s3_constants.VersionsFolder
|
||||
|
||||
// Update the .versions directory metadata to indicate this is the latest version
|
||||
// Pass entry to cache its metadata for single-scan list efficiency
|
||||
err = s3a.updateLatestVersionInDirectory(*input.Bucket, *input.Key, versionId, versionFileName, versionEntryForCache)
|
||||
if err != nil {
|
||||
glog.Errorf("completeMultipartUpload: failed to update latest version in directory: %v", err)
|
||||
return nil, s3err.ErrInternalError
|
||||
}
|
||||
|
||||
// For versioned buckets, all content is stored in .versions directory
|
||||
// The latest version information is tracked in the .versions directory metadata
|
||||
output = &CompleteMultipartUploadResult{
|
||||
Location: aws.String(fmt.Sprintf("%s://%s/%s/%s", getRequestScheme(r), r.Host, url.PathEscape(*input.Bucket), urlPathEscape(*input.Key))),
|
||||
Bucket: input.Bucket,
|
||||
ETag: aws.String(etagQuote),
|
||||
Key: objectKey(input.Key),
|
||||
VersionId: aws.String(versionId),
|
||||
}
|
||||
} else if vErr == nil && versioningState == s3_constants.VersioningSuspended {
|
||||
// For suspended versioning, add "null" version ID metadata and return "null" version ID
|
||||
err = s3a.mkFile(dirName, entryName, finalParts, func(entry *filer_pb.Entry) {
|
||||
if entry.Extended == nil {
|
||||
entry.Extended = make(map[string][]byte)
|
||||
}
|
||||
entry.Extended[s3_constants.ExtVersionIdKey] = []byte("null")
|
||||
// Store parts count for x-amz-mp-parts-count header
|
||||
entry.Extended[s3_constants.SeaweedFSMultipartPartsCount] = []byte(fmt.Sprintf("%d", len(completedPartNumbers)))
|
||||
// Store part boundaries for GetObject with PartNumber
|
||||
if partBoundariesJSON, jsonErr := json.Marshal(partBoundaries); jsonErr == nil {
|
||||
entry.Extended[s3_constants.SeaweedFSMultipartPartBoundaries] = partBoundariesJSON
|
||||
}
|
||||
|
||||
// Set object owner for suspended versioning multipart objects
|
||||
// Capture timestamp and owner once for consistency between version entry and cache entry
|
||||
versionMtime := time.Now().Unix()
|
||||
amzAccountId := r.Header.Get(s3_constants.AmzAccountId)
|
||||
if amzAccountId != "" {
|
||||
entry.Extended[s3_constants.ExtAmzOwnerKey] = []byte(amzAccountId)
|
||||
}
|
||||
|
||||
for k, v := range pentry.Extended {
|
||||
if k != s3_constants.ExtMultipartObjectKey {
|
||||
entry.Extended[k] = v
|
||||
// Create the version file in the .versions directory
|
||||
if err := s3a.mkFile(versionDir, versionFileName, completionState.finalParts, func(versionEntry *filer_pb.Entry) {
|
||||
if versionEntry.Extended == nil {
|
||||
versionEntry.Extended = make(map[string][]byte)
|
||||
}
|
||||
versionEntry.Extended[s3_constants.ExtVersionIdKey] = []byte(versionId)
|
||||
versionEntry.Extended[s3_constants.SeaweedFSUploadId] = []byte(*input.UploadId)
|
||||
// Store parts count for x-amz-mp-parts-count header
|
||||
versionEntry.Extended[s3_constants.SeaweedFSMultipartPartsCount] = []byte(fmt.Sprintf("%d", len(completedPartNumbers)))
|
||||
// Store part boundaries for GetObject with PartNumber
|
||||
if partBoundariesJSON, err := json.Marshal(completionState.partBoundaries); err == nil {
|
||||
versionEntry.Extended[s3_constants.SeaweedFSMultipartPartBoundaries] = partBoundariesJSON
|
||||
}
|
||||
|
||||
// Set object owner for versioned multipart objects
|
||||
if amzAccountId != "" {
|
||||
versionEntry.Extended[s3_constants.ExtAmzOwnerKey] = []byte(amzAccountId)
|
||||
}
|
||||
|
||||
for k, v := range completionState.pentry.Extended {
|
||||
if k != s3_constants.ExtMultipartObjectKey {
|
||||
versionEntry.Extended[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
// Persist ETag to ensure subsequent HEAD/GET uses the same value
|
||||
versionEntry.Extended[s3_constants.ExtETagKey] = []byte(completionState.multipartETag)
|
||||
|
||||
// Preserve ALL SSE metadata from the first part (if any)
|
||||
// SSE metadata is stored in individual parts, not the upload directory
|
||||
if len(completedPartNumbers) > 0 && len(completionState.partEntries[completedPartNumbers[0]]) > 0 {
|
||||
firstPartEntry := completionState.partEntries[completedPartNumbers[0]][0]
|
||||
copySSEHeadersFromFirstPart(versionEntry, firstPartEntry, "versioned")
|
||||
}
|
||||
if completionState.pentry.Attributes != nil && completionState.pentry.Attributes.Mime != "" {
|
||||
versionEntry.Attributes.Mime = completionState.pentry.Attributes.Mime
|
||||
} else if completionState.mime != "" {
|
||||
versionEntry.Attributes.Mime = completionState.mime
|
||||
}
|
||||
versionEntry.Attributes.FileSize = uint64(completionState.offset)
|
||||
versionEntry.Attributes.Mtime = versionMtime
|
||||
}); err != nil {
|
||||
glog.Errorf("completeMultipartUpload: failed to create version %s: %v", versionId, err)
|
||||
return s3err.ErrInternalError
|
||||
}
|
||||
|
||||
// Preserve ALL SSE metadata from the first part (if any)
|
||||
// SSE metadata is stored in individual parts, not the upload directory
|
||||
if len(completedPartNumbers) > 0 && len(partEntries[completedPartNumbers[0]]) > 0 {
|
||||
firstPartEntry := partEntries[completedPartNumbers[0]][0]
|
||||
copySSEHeadersFromFirstPart(entry, firstPartEntry, "suspended versioning")
|
||||
// Construct entry with metadata for caching in .versions directory
|
||||
// Reuse versionMtime to keep list vs. HEAD timestamps aligned
|
||||
// multipartETag is precomputed
|
||||
versionEntryForCache := &filer_pb.Entry{
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
FileSize: uint64(completionState.offset),
|
||||
Mtime: versionMtime,
|
||||
},
|
||||
Extended: map[string][]byte{
|
||||
s3_constants.ExtETagKey: []byte(completionState.multipartETag),
|
||||
},
|
||||
}
|
||||
// Persist ETag to ensure subsequent HEAD/GET uses the same value
|
||||
entry.Extended[s3_constants.ExtETagKey] = []byte(multipartETag)
|
||||
if pentry.Attributes.Mime != "" {
|
||||
entry.Attributes.Mime = pentry.Attributes.Mime
|
||||
} else if mime != "" {
|
||||
entry.Attributes.Mime = mime
|
||||
if amzAccountId != "" {
|
||||
versionEntryForCache.Extended[s3_constants.ExtAmzOwnerKey] = []byte(amzAccountId)
|
||||
}
|
||||
entry.Attributes.FileSize = uint64(offset)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
glog.Errorf("completeMultipartUpload: failed to create suspended versioning object: %v", err)
|
||||
return nil, s3err.ErrInternalError
|
||||
// Update the .versions directory metadata to indicate this is the latest version
|
||||
// Pass entry to cache its metadata for single-scan list efficiency
|
||||
if err := s3a.updateLatestVersionInDirectory(*input.Bucket, *input.Key, versionId, versionFileName, versionEntryForCache); err != nil {
|
||||
if rollbackErr := s3a.rollbackMultipartVersion(versionDir, versionFileName); rollbackErr != nil {
|
||||
glog.Errorf("completeMultipartUpload: failed to rollback version %s for %s/%s after latest pointer update error: %v", versionId, *input.Bucket, *input.Key, rollbackErr)
|
||||
}
|
||||
glog.Errorf("completeMultipartUpload: failed to update latest version in directory: %v", err)
|
||||
return s3err.ErrInternalError
|
||||
}
|
||||
|
||||
// For versioned buckets, all content is stored in .versions directory
|
||||
// The latest version information is tracked in the .versions directory metadata
|
||||
output = &CompleteMultipartUploadResult{
|
||||
Location: aws.String(fmt.Sprintf("%s://%s/%s/%s", getRequestScheme(r), r.Host, url.PathEscape(*input.Bucket), urlPathEscape(*input.Key))),
|
||||
Bucket: input.Bucket,
|
||||
ETag: aws.String(etagQuote),
|
||||
Key: objectKey(input.Key),
|
||||
VersionId: aws.String(versionId),
|
||||
}
|
||||
return s3err.ErrNone
|
||||
}
|
||||
|
||||
// Note: Suspended versioning should NOT return VersionId field according to AWS S3 spec
|
||||
output = &CompleteMultipartUploadResult{
|
||||
Location: aws.String(fmt.Sprintf("%s://%s/%s/%s", getRequestScheme(r), r.Host, url.PathEscape(*input.Bucket), urlPathEscape(*input.Key))),
|
||||
Bucket: input.Bucket,
|
||||
ETag: aws.String(etagQuote),
|
||||
Key: objectKey(input.Key),
|
||||
// VersionId field intentionally omitted for suspended versioning
|
||||
if versioningState == s3_constants.VersioningSuspended {
|
||||
// For suspended versioning, add "null" version ID metadata and return "null" version ID
|
||||
if err := s3a.mkFile(dirName, entryName, completionState.finalParts, func(entry *filer_pb.Entry) {
|
||||
if entry.Extended == nil {
|
||||
entry.Extended = make(map[string][]byte)
|
||||
}
|
||||
entry.Extended[s3_constants.ExtVersionIdKey] = []byte("null")
|
||||
entry.Extended[s3_constants.SeaweedFSUploadId] = []byte(*input.UploadId)
|
||||
// Store parts count for x-amz-mp-parts-count header
|
||||
entry.Extended[s3_constants.SeaweedFSMultipartPartsCount] = []byte(fmt.Sprintf("%d", len(completedPartNumbers)))
|
||||
// Store part boundaries for GetObject with PartNumber
|
||||
if partBoundariesJSON, jsonErr := json.Marshal(completionState.partBoundaries); jsonErr == nil {
|
||||
entry.Extended[s3_constants.SeaweedFSMultipartPartBoundaries] = partBoundariesJSON
|
||||
}
|
||||
|
||||
// Set object owner for suspended versioning multipart objects
|
||||
amzAccountId := r.Header.Get(s3_constants.AmzAccountId)
|
||||
if amzAccountId != "" {
|
||||
entry.Extended[s3_constants.ExtAmzOwnerKey] = []byte(amzAccountId)
|
||||
}
|
||||
|
||||
for k, v := range completionState.pentry.Extended {
|
||||
if k != s3_constants.ExtMultipartObjectKey {
|
||||
entry.Extended[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
// Preserve ALL SSE metadata from the first part (if any)
|
||||
// SSE metadata is stored in individual parts, not the upload directory
|
||||
if len(completedPartNumbers) > 0 && len(completionState.partEntries[completedPartNumbers[0]]) > 0 {
|
||||
firstPartEntry := completionState.partEntries[completedPartNumbers[0]][0]
|
||||
copySSEHeadersFromFirstPart(entry, firstPartEntry, "suspended versioning")
|
||||
}
|
||||
// Persist ETag to ensure subsequent HEAD/GET uses the same value
|
||||
entry.Extended[s3_constants.ExtETagKey] = []byte(completionState.multipartETag)
|
||||
if completionState.pentry.Attributes != nil && completionState.pentry.Attributes.Mime != "" {
|
||||
entry.Attributes.Mime = completionState.pentry.Attributes.Mime
|
||||
} else if completionState.mime != "" {
|
||||
entry.Attributes.Mime = completionState.mime
|
||||
}
|
||||
entry.Attributes.FileSize = uint64(completionState.offset)
|
||||
}); err != nil {
|
||||
glog.Errorf("completeMultipartUpload: failed to create suspended versioning object: %v", err)
|
||||
return s3err.ErrInternalError
|
||||
}
|
||||
|
||||
// Note: Suspended versioning should NOT return VersionId field according to AWS S3 spec
|
||||
output = &CompleteMultipartUploadResult{
|
||||
Location: aws.String(fmt.Sprintf("%s://%s/%s/%s", getRequestScheme(r), r.Host, url.PathEscape(*input.Bucket), urlPathEscape(*input.Key))),
|
||||
Bucket: input.Bucket,
|
||||
ETag: aws.String(etagQuote),
|
||||
Key: objectKey(input.Key),
|
||||
// VersionId field intentionally omitted for suspended versioning
|
||||
}
|
||||
return s3err.ErrNone
|
||||
}
|
||||
} else {
|
||||
|
||||
// For non-versioned buckets, create main object file
|
||||
err = s3a.mkFile(dirName, entryName, finalParts, func(entry *filer_pb.Entry) {
|
||||
if err := s3a.mkFile(dirName, entryName, completionState.finalParts, func(entry *filer_pb.Entry) {
|
||||
if entry.Extended == nil {
|
||||
entry.Extended = make(map[string][]byte)
|
||||
}
|
||||
@@ -520,7 +565,7 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
// Store parts count for x-amz-mp-parts-count header
|
||||
entry.Extended[s3_constants.SeaweedFSMultipartPartsCount] = []byte(fmt.Sprintf("%d", len(completedPartNumbers)))
|
||||
// Store part boundaries for GetObject with PartNumber
|
||||
if partBoundariesJSON, err := json.Marshal(partBoundaries); err == nil {
|
||||
if partBoundariesJSON, err := json.Marshal(completionState.partBoundaries); err == nil {
|
||||
entry.Extended[s3_constants.SeaweedFSMultipartPartBoundaries] = partBoundariesJSON
|
||||
}
|
||||
|
||||
@@ -530,7 +575,7 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
entry.Extended[s3_constants.ExtAmzOwnerKey] = []byte(amzAccountId)
|
||||
}
|
||||
|
||||
for k, v := range pentry.Extended {
|
||||
for k, v := range completionState.pentry.Extended {
|
||||
if k != s3_constants.ExtMultipartObjectKey {
|
||||
entry.Extended[k] = v
|
||||
}
|
||||
@@ -538,27 +583,25 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
|
||||
// Preserve ALL SSE metadata from the first part (if any)
|
||||
// SSE metadata is stored in individual parts, not the upload directory
|
||||
if len(completedPartNumbers) > 0 && len(partEntries[completedPartNumbers[0]]) > 0 {
|
||||
firstPartEntry := partEntries[completedPartNumbers[0]][0]
|
||||
if len(completedPartNumbers) > 0 && len(completionState.partEntries[completedPartNumbers[0]]) > 0 {
|
||||
firstPartEntry := completionState.partEntries[completedPartNumbers[0]][0]
|
||||
copySSEHeadersFromFirstPart(entry, firstPartEntry, "non-versioned")
|
||||
}
|
||||
// Persist ETag to ensure subsequent HEAD/GET uses the same value
|
||||
entry.Extended[s3_constants.ExtETagKey] = []byte(multipartETag)
|
||||
if pentry.Attributes.Mime != "" {
|
||||
entry.Attributes.Mime = pentry.Attributes.Mime
|
||||
} else if mime != "" {
|
||||
entry.Attributes.Mime = mime
|
||||
entry.Extended[s3_constants.ExtETagKey] = []byte(completionState.multipartETag)
|
||||
if completionState.pentry.Attributes != nil && completionState.pentry.Attributes.Mime != "" {
|
||||
entry.Attributes.Mime = completionState.pentry.Attributes.Mime
|
||||
} else if completionState.mime != "" {
|
||||
entry.Attributes.Mime = completionState.mime
|
||||
}
|
||||
entry.Attributes.FileSize = uint64(offset)
|
||||
entry.Attributes.FileSize = uint64(completionState.offset)
|
||||
// Set TTL-based S3 expiry (modification time)
|
||||
if entityWithTtl {
|
||||
if completionState.entityWithTtl {
|
||||
entry.Extended[s3_constants.SeaweedFSExpiresS3] = []byte("true")
|
||||
}
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
}); err != nil {
|
||||
glog.Errorf("completeMultipartUpload %s/%s error: %v", dirName, entryName, err)
|
||||
return nil, s3err.ErrInternalError
|
||||
return s3err.ErrInternalError
|
||||
}
|
||||
|
||||
// For non-versioned buckets, return response without VersionId
|
||||
@@ -568,21 +611,30 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
|
||||
ETag: aws.String(etagQuote),
|
||||
Key: objectKey(input.Key),
|
||||
}
|
||||
return s3err.ErrNone
|
||||
})
|
||||
if finalizeCode != s3err.ErrNone {
|
||||
return nil, finalizeCode
|
||||
}
|
||||
|
||||
for _, deleteEntry := range deleteEntries {
|
||||
//delete unused part data
|
||||
if err = s3a.rm(uploadDirectory, deleteEntry.Name, true, true); err != nil {
|
||||
glog.Warningf("completeMultipartUpload cleanup %s upload %s unused %s : %v", *input.Bucket, *input.UploadId, deleteEntry.Name, err)
|
||||
if completionState != nil {
|
||||
for _, deleteEntry := range completionState.deleteEntries {
|
||||
if err := s3a.rm(uploadDirectory, deleteEntry.Name, true, true); err != nil {
|
||||
glog.Warningf("completeMultipartUpload cleanup %s upload %s unused %s : %v", *input.Bucket, *input.UploadId, deleteEntry.Name, err)
|
||||
}
|
||||
}
|
||||
if err := s3a.rm(s3a.genUploadsFolder(*input.Bucket), *input.UploadId, false, true); err != nil {
|
||||
glog.V(1).Infof("completeMultipartUpload cleanup %s upload %s: %v", *input.Bucket, *input.UploadId, err)
|
||||
}
|
||||
}
|
||||
if err = s3a.rm(s3a.genUploadsFolder(*input.Bucket), *input.UploadId, false, true); err != nil {
|
||||
glog.V(1).Infof("completeMultipartUpload cleanup %s upload %s: %v", *input.Bucket, *input.UploadId, err)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (s3a *S3ApiServer) rollbackMultipartVersion(versionDir, versionFileName string) error {
|
||||
return s3a.rmObject(versionDir, versionFileName, true, false)
|
||||
}
|
||||
|
||||
func (s3a *S3ApiServer) getEntryNameAndDir(input *s3.CompleteMultipartUploadInput) (string, string) {
|
||||
entryName := path.Base(*input.Key)
|
||||
dirName := path.Dir(*input.Key)
|
||||
|
||||
Reference in New Issue
Block a user