* fix multipart etag * address comments * clean up * clean up * optimization * address comments * unquoted etag * dedup * upgrade * clean * etag * return quoted tag * quoted etag * debug * s3api: unify ETag retrieval and quoting across handlers Refactor newListEntry to take *S3ApiServer and use getObjectETag, and update setResponseHeaders to use the same logic. This ensures consistent ETags are returned for both listing and direct access. * s3api: implement ListObjects deduplication for versioned buckets Handle duplicate entries between the main path and the .versions directory by prioritizing the latest version when bucket versioning is enabled. * s3api: cleanup stale main file entries during versioned uploads Add explicit deletion of pre-existing "main" files when creating new versions in versioned buckets. This prevents stale entries from appearing in bucket listings and ensures consistency. * s3api: fix cleanup code placement in versioned uploads Correct the placement of rm calls in completeMultipartUpload and putVersionedObject to ensure stale main files are properly deleted during versioned uploads. * s3api: improve getObjectETag fallback for empty ExtETagKey Ensure that when ExtETagKey exists but contains an empty value, the function falls through to MD5/chunk-based calculation instead of returning an empty string. * s3api: fix test files for new newListEntry signature Update test files to use the new newListEntry signature where the first parameter is *S3ApiServer. Created mockS3ApiServer to properly test owner display name lookup functionality. * s3api: use filer.ETag for consistent Md5 handling in getEtagFromEntry Change getEtagFromEntry fallback to use filer.ETag(entry) instead of filer.ETagChunks to ensure legacy entries with Attributes.Md5 are handled consistently with the rest of the codebase. * s3api: optimize list logic and fix conditional header logging - Hoist bucket versioning check out of per-entry callback to avoid repeated getVersioningState calls - Extract appendOrDedup helper function to eliminate duplicate dedup/append logic across multiple code paths - Change If-Match mismatch logging from glog.Errorf to glog.V(3).Infof and remove DEBUG prefix for consistency * s3api: fix test mock to properly initialize IAM accounts Fixed nil pointer dereference in TestNewListEntryOwnerDisplayName by directly initializing the IdentityAccessManagement.accounts map in the test setup. This ensures newListEntry can properly look up account display names without panicking. * cleanup * s3api: remove premature main file cleanup in versioned uploads Removed incorrect cleanup logic that was deleting main files during versioned uploads. This was causing test failures because it deleted objects that should have been preserved as null versions when versioning was first enabled. The deduplication logic in listing is sufficient to handle duplicate entries without deleting files during upload. * s3api: add empty-value guard to getEtagFromEntry Added the same empty-value guard used in getObjectETag to prevent returning quoted empty strings. When ExtETagKey exists but is empty, the function now falls through to filer.ETag calculation instead of returning "". * s3api: fix listing of directory key objects with matching prefix Revert prefix handling logic to use strings.TrimPrefix instead of checking HasPrefix with empty string result. This ensures that when a directory key object exactly matches the prefix (e.g. prefix="dir/", object="dir/"), it is correctly handled as a regular entry instead of being skipped or incorrectly processed as a common prefix. Also fixed missing variable definition. * s3api: refactor list inline dedup to use appendOrDedup helper Refactored the inline deduplication logic in listFilerEntries to use the shared appendOrDedup helper function. This ensures consistent behavior and reduces code duplication. * test: fix port allocation race in s3tables integration test Updated startMiniCluster to find all required ports simultaneously using findAvailablePorts instead of sequentially. This prevents race conditions where the OS reallocates a port that was just released, causing multiple services (e.g. Filer and Volume) to be assigned the same port and fail to start.
263 lines
8.3 KiB
Go
263 lines
8.3 KiB
Go
package weed_server
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/md5"
|
|
"encoding/base64"
|
|
"fmt"
|
|
"hash"
|
|
"io"
|
|
"net/http"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
|
|
"slices"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/operation"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/security"
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
var bufPool = sync.Pool{
|
|
New: func() interface{} {
|
|
return new(bytes.Buffer)
|
|
},
|
|
}
|
|
|
|
func (fs *FilerServer) uploadRequestToChunks(ctx context.Context, w http.ResponseWriter, r *http.Request, reader io.Reader, chunkSize int32, fileName, contentType string, contentLength int64, so *operation.StorageOption) (fileChunks []*filer_pb.FileChunk, md5Hash hash.Hash, chunkOffset int64, uploadErr error, smallContent []byte) {
|
|
query := r.URL.Query()
|
|
|
|
isAppend := isAppend(r)
|
|
if query.Has("offset") {
|
|
offset := query.Get("offset")
|
|
offsetInt, err := strconv.ParseInt(offset, 10, 64)
|
|
if err != nil || offsetInt < 0 {
|
|
err = fmt.Errorf("invalid 'offset': '%s'", offset)
|
|
return nil, nil, 0, err, nil
|
|
}
|
|
if isAppend && offsetInt > 0 {
|
|
err = fmt.Errorf("cannot set offset when op=append")
|
|
return nil, nil, 0, err, nil
|
|
}
|
|
chunkOffset = offsetInt
|
|
}
|
|
|
|
return fs.uploadReaderToChunks(ctx, r, reader, chunkOffset, chunkSize, fileName, contentType, isAppend, so)
|
|
}
|
|
|
|
func (fs *FilerServer) uploadReaderToChunks(ctx context.Context, r *http.Request, reader io.Reader, startOffset int64, chunkSize int32, fileName, contentType string, isAppend bool, so *operation.StorageOption) (fileChunks []*filer_pb.FileChunk, md5Hash hash.Hash, chunkOffset int64, uploadErr error, smallContent []byte) {
|
|
|
|
md5Hash = md5.New()
|
|
chunkOffset = startOffset
|
|
var partReader = io.NopCloser(io.TeeReader(reader, md5Hash))
|
|
|
|
var wg sync.WaitGroup
|
|
var bytesBufferCounter int64 = 4
|
|
bytesBufferLimitChan := make(chan struct{}, bytesBufferCounter)
|
|
var fileChunksLock sync.Mutex
|
|
var uploadErrLock sync.Mutex
|
|
for {
|
|
|
|
// need to throttle used byte buffer
|
|
bytesBufferLimitChan <- struct{}{}
|
|
|
|
// As long as there is an error in the upload of one chunk, it can be terminated early
|
|
// uploadErr may be modified in other go routines, lock is needed to avoid race condition
|
|
uploadErrLock.Lock()
|
|
if uploadErr != nil {
|
|
<-bytesBufferLimitChan
|
|
uploadErrLock.Unlock()
|
|
break
|
|
}
|
|
uploadErrLock.Unlock()
|
|
|
|
bytesBuffer := bufPool.Get().(*bytes.Buffer)
|
|
|
|
limitedReader := io.LimitReader(partReader, int64(chunkSize))
|
|
|
|
bytesBuffer.Reset()
|
|
|
|
dataSize, err := bytesBuffer.ReadFrom(limitedReader)
|
|
|
|
// data, err := io.ReadAll(limitedReader)
|
|
if err != nil || dataSize == 0 {
|
|
bufPool.Put(bytesBuffer)
|
|
<-bytesBufferLimitChan
|
|
if err != nil {
|
|
uploadErrLock.Lock()
|
|
if uploadErr == nil {
|
|
uploadErr = err
|
|
}
|
|
uploadErrLock.Unlock()
|
|
}
|
|
break
|
|
}
|
|
if chunkOffset == 0 && !isAppend {
|
|
if dataSize < fs.option.SaveToFilerLimit {
|
|
chunkOffset += dataSize
|
|
smallContent = make([]byte, dataSize)
|
|
bytesBuffer.Read(smallContent)
|
|
bufPool.Put(bytesBuffer)
|
|
<-bytesBufferLimitChan
|
|
stats.FilerHandlerCounter.WithLabelValues(stats.ContentSaveToFiler).Inc()
|
|
break
|
|
}
|
|
} else {
|
|
stats.FilerHandlerCounter.WithLabelValues(stats.AutoChunk).Inc()
|
|
}
|
|
|
|
wg.Add(1)
|
|
go func(offset int64, buf *bytes.Buffer) {
|
|
defer func() {
|
|
bufPool.Put(buf)
|
|
<-bytesBufferLimitChan
|
|
wg.Done()
|
|
}()
|
|
|
|
chunks, toChunkErr := fs.dataToChunkWithSSE(ctx, r, fileName, contentType, buf.Bytes(), offset, so)
|
|
if toChunkErr != nil {
|
|
uploadErrLock.Lock()
|
|
if uploadErr == nil {
|
|
uploadErr = toChunkErr
|
|
}
|
|
uploadErrLock.Unlock()
|
|
}
|
|
if chunks != nil {
|
|
fileChunksLock.Lock()
|
|
for _, chunk := range chunks {
|
|
fileChunks = append(fileChunks, chunk)
|
|
}
|
|
fileChunksLock.Unlock()
|
|
}
|
|
}(chunkOffset, bytesBuffer)
|
|
|
|
// reset variables for the next chunk
|
|
glog.V(4).Infof("uploadReaderToChunks read chunk at offset %d, size %d", chunkOffset, dataSize)
|
|
chunkOffset = chunkOffset + dataSize
|
|
|
|
// if last chunk was not at full chunk size, but already exhausted the reader
|
|
if dataSize < int64(chunkSize) {
|
|
break
|
|
}
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
if uploadErr != nil {
|
|
glog.V(0).InfofCtx(ctx, "upload file %s error: %v", fileName, uploadErr)
|
|
for _, chunk := range fileChunks {
|
|
glog.V(4).InfofCtx(ctx, "purging failed uploaded %s chunk %s [%d,%d)", fileName, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size))
|
|
}
|
|
fs.filer.DeleteUncommittedChunks(ctx, fileChunks)
|
|
return nil, md5Hash, 0, uploadErr, nil
|
|
}
|
|
slices.SortFunc(fileChunks, func(a, b *filer_pb.FileChunk) int {
|
|
return int(a.Offset - b.Offset)
|
|
})
|
|
return fileChunks, md5Hash, chunkOffset, nil, smallContent
|
|
}
|
|
|
|
func (fs *FilerServer) doUpload(ctx context.Context, urlLocation string, limitedReader io.Reader, fileName string, contentType string, pairMap map[string]string, auth security.EncodedJwt, contentMd5 string) (*operation.UploadResult, error, []byte) {
|
|
|
|
stats.FilerHandlerCounter.WithLabelValues(stats.ChunkUpload).Inc()
|
|
start := time.Now()
|
|
defer func() {
|
|
stats.FilerRequestHistogram.WithLabelValues(stats.ChunkUpload).Observe(time.Since(start).Seconds())
|
|
}()
|
|
|
|
uploadOption := &operation.UploadOption{
|
|
UploadUrl: urlLocation,
|
|
Filename: fileName,
|
|
Cipher: fs.option.Cipher,
|
|
IsInputCompressed: false,
|
|
MimeType: contentType,
|
|
PairMap: pairMap,
|
|
Jwt: auth,
|
|
Md5: contentMd5,
|
|
}
|
|
|
|
uploader, err := operation.NewUploader()
|
|
if err != nil {
|
|
return nil, err, []byte{}
|
|
}
|
|
|
|
// Use a context that ignores cancellation from the request context
|
|
uploadCtx := context.WithoutCancel(ctx)
|
|
|
|
uploadResult, err, data := uploader.Upload(uploadCtx, limitedReader, uploadOption)
|
|
if uploadResult != nil && uploadResult.RetryCount > 0 {
|
|
stats.FilerHandlerCounter.WithLabelValues(stats.ChunkUploadRetry).Add(float64(uploadResult.RetryCount))
|
|
}
|
|
return uploadResult, err, data
|
|
}
|
|
|
|
func (fs *FilerServer) dataToChunk(ctx context.Context, fileName, contentType string, data []byte, chunkOffset int64, so *operation.StorageOption) ([]*filer_pb.FileChunk, error) {
|
|
return fs.dataToChunkWithSSE(ctx, nil, fileName, contentType, data, chunkOffset, so)
|
|
}
|
|
|
|
func (fs *FilerServer) dataToChunkWithSSE(ctx context.Context, r *http.Request, fileName, contentType string, data []byte, chunkOffset int64, so *operation.StorageOption) ([]*filer_pb.FileChunk, error) {
|
|
dataReader := util.NewBytesReader(data)
|
|
|
|
// retry to assign a different file id
|
|
var fileId, urlLocation string
|
|
var auth security.EncodedJwt
|
|
var uploadErr error
|
|
var uploadResult *operation.UploadResult
|
|
var failedFileChunks []*filer_pb.FileChunk
|
|
|
|
err := util.Retry("filerDataToChunk", func() error {
|
|
// assign one file id for one chunk
|
|
fileId, urlLocation, auth, uploadErr = fs.assignNewFileInfo(ctx, so)
|
|
if uploadErr != nil {
|
|
glog.V(4).InfofCtx(ctx, "retry later due to assign error: %v", uploadErr)
|
|
stats.FilerHandlerCounter.WithLabelValues(stats.ChunkAssignRetry).Inc()
|
|
return uploadErr
|
|
}
|
|
chunkMd5 := md5.Sum(data)
|
|
chunkMd5B64 := base64.StdEncoding.EncodeToString(chunkMd5[:])
|
|
// upload the chunk to the volume server
|
|
uploadResult, uploadErr, _ = fs.doUpload(ctx, urlLocation, dataReader, fileName, contentType, nil, auth, chunkMd5B64)
|
|
if uploadErr != nil {
|
|
glog.V(4).InfofCtx(ctx, "retry later due to upload error: %v", uploadErr)
|
|
stats.FilerHandlerCounter.WithLabelValues(stats.ChunkDoUploadRetry).Inc()
|
|
fid, _ := filer_pb.ToFileIdObject(fileId)
|
|
fileChunk := filer_pb.FileChunk{
|
|
FileId: fileId,
|
|
Offset: chunkOffset,
|
|
Fid: fid,
|
|
}
|
|
failedFileChunks = append(failedFileChunks, &fileChunk)
|
|
return uploadErr
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
glog.ErrorfCtx(ctx, "upload error: %v", err)
|
|
return failedFileChunks, err
|
|
}
|
|
|
|
// if last chunk exhausted the reader exactly at the border
|
|
if uploadResult.Size == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
// Extract SSE metadata from request headers if available
|
|
var sseType filer_pb.SSEType = filer_pb.SSEType_NONE
|
|
var sseMetadata []byte
|
|
|
|
// Create chunk with SSE metadata if available
|
|
var chunk *filer_pb.FileChunk
|
|
if sseType != filer_pb.SSEType_NONE {
|
|
chunk = uploadResult.ToPbFileChunkWithSSE(fileId, chunkOffset, time.Now().UnixNano(), sseType, sseMetadata)
|
|
} else {
|
|
chunk = uploadResult.ToPbFileChunk(fileId, chunkOffset, time.Now().UnixNano())
|
|
}
|
|
|
|
return []*filer_pb.FileChunk{chunk}, nil
|
|
}
|