Add md5 checksum validation support on PutObject and UploadPart (#8367)

* Add md5 checksum validation support on PutObject and UploadPart

Per the S3 specification, when a client sends a Content-MD5 header, the server must compare it against the MD5 of the received body and return BadDigest (HTTP 400) if they don't match.

SeaweedFS was silently accepting objects with incorrect Content-MD5 headers, which breaks data integrity verification for clients that rely on this feature (e.g. boto3). The error infrastructure (ErrBadDigest, ErrMsgBadDigest) already existed from PR #7306 but was never wired to an actual check.

This commit adds MD5 verification in putToFiler after the body is streamed and the MD5 is computed, and adds Content-MD5 header validation to PutObjectPartHandler (matching PutObjectHandler). Orphaned chunks are cleaned up on mismatch.

Refs: https://github.com/seaweedfs/seaweedfs/discussions/3908

* handle SSE, add uploadpart test

* s3 integration test: fix typo and add multipart upload checksum test

* s3api: move validateContentMd5 after GetBucketAndObject in PutObjectPartHandler

* s3api: move validateContentMd5 after GetBucketAndObject in PutObjectHandler

* s3api: fix MD5 validation for SSE uploads and logging in putToFiler

* add SSE test with checksum validation - mostly ai-generated

* Update s3_integration_test.go

* Address S3 integration test feedback: fix typos, rename variables, add verification steps, and clean up comments.

---------

Co-authored-by: Chris Lu <chris.lu@gmail.com>
This commit is contained in:
Michał Szynkiewicz
2026-02-19 00:40:08 +01:00
committed by GitHub
parent 6a3a97333f
commit 53048ffffb
3 changed files with 326 additions and 12 deletions

View File

@@ -316,7 +316,11 @@ func (s3a *S3ApiServer) ListObjectPartsHandler(w http.ResponseWriter, r *http.Re
// PutObjectPartHandler - Put an object part in a multipart upload.
func (s3a *S3ApiServer) PutObjectPartHandler(w http.ResponseWriter, r *http.Request) {
bucket, object := s3_constants.GetBucketAndObject(r)
_, err := validateContentMd5(r.Header)
if err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrInvalidDigest)
return
}
// Check if bucket exists before putting object part
if err := s3a.checkBucket(r, bucket); err != s3err.ErrNone {
s3err.WriteErrorResponse(w, r, err)
@@ -326,7 +330,7 @@ func (s3a *S3ApiServer) PutObjectPartHandler(w http.ResponseWriter, r *http.Requ
uploadID := r.URL.Query().Get("uploadId")
// validateTableBucketObjectPath is enforced at multipart initiation. checkUploadId
// cryptographically binds uploadID to object path, so parts cannot switch paths.
err := s3a.checkUploadId(object, uploadID)
err = s3a.checkUploadId(object, uploadID)
if err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchUpload)
return

View File

@@ -1,7 +1,9 @@
package s3api
import (
"bytes"
"context"
"crypto/md5"
"encoding/base64"
"encoding/json"
"errors"
@@ -72,21 +74,19 @@ type SSEResponseMetadata struct {
}
func (s3a *S3ApiServer) PutObjectHandler(w http.ResponseWriter, r *http.Request) {
// http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html
bucket, object := s3_constants.GetBucketAndObject(r)
glog.V(2).Infof("PutObjectHandler bucket=%s object=%s size=%d", bucket, object, r.ContentLength)
if err := s3a.validateTableBucketObjectPath(bucket, object); err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrAccessDenied)
return
}
_, err := validateContentMd5(r.Header)
if err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrInvalidDigest)
return
}
glog.V(2).Infof("PutObjectHandler bucket=%s object=%s size=%d", bucket, object, r.ContentLength)
if err := s3a.validateTableBucketObjectPath(bucket, object); err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrAccessDenied)
return
}
// Check conditional headers
if errCode := s3a.checkConditionalHeaders(r, bucket, object); errCode != s3err.ErrNone {
@@ -288,11 +288,13 @@ func (s3a *S3ApiServer) putToFiler(r *http.Request, filePath string, dataReader
// NEW OPTIMIZATION: Write directly to volume servers, bypassing filer proxy
// This eliminates the filer proxy overhead for PUT operations
// Note: filePath is now passed directly instead of URL (no parsing needed)
// For SSE, encrypt with offset=0 for all parts
// Each part is encrypted independently, then decrypted using metadata during GET
partOffset := int64(0)
plaintextHash := md5.New()
dataReader = io.TeeReader(dataReader, plaintextHash)
// Handle all SSE encryption types in a unified manner
sseResult, sseErrorCode := s3a.handleAllSSEEncryption(r, dataReader, partOffset)
if sseErrorCode != s3err.ErrNone {
@@ -426,8 +428,21 @@ func (s3a *S3ApiServer) putToFiler(r *http.Request, filePath string, dataReader
}
// Step 3: Calculate MD5 hash and add SSE metadata to chunks
md5Sum := chunkResult.Md5Hash.Sum(nil)
md5Sum := plaintextHash.Sum(nil)
contentMd5 := r.Header.Get("Content-Md5")
if contentMd5 != "" {
expectedMd5, err := base64.StdEncoding.DecodeString(contentMd5)
if err != nil {
glog.Errorf("putToFiler: Invalid Content-Md5 header: %v, attempting to cleanup %d orphaned chunks", err, len(chunkResult.FileChunks))
s3a.deleteOrphanedChunks(chunkResult.FileChunks)
return "", s3err.ErrInvalidDigest, SSEResponseMetadata{}
}
if !bytes.Equal(md5Sum, expectedMd5) {
glog.Warningf("putToFiler: Checksum verification failed, attempting to cleanup %d orphaned chunks", len(chunkResult.FileChunks))
s3a.deleteOrphanedChunks(chunkResult.FileChunks)
return "", s3err.ErrBadDigest, SSEResponseMetadata{}
}
}
glog.V(4).Infof("putToFiler: Chunked upload SUCCESS - path=%s, chunks=%d, size=%d",
filePath, len(chunkResult.FileChunks), chunkResult.TotalSize)