s3: use url.PathUnescape for X-Amz-Copy-Source header (#8545)

* s3: use url.PathUnescape for X-Amz-Copy-Source header (#8544)

The X-Amz-Copy-Source header is a URL-encoded path, not a query string.
Using url.QueryUnescape incorrectly converts literal '+' characters to
spaces, which can cause object key mismatches during copy operations.

Switch to url.PathUnescape in CopyObjectHandler, CopyObjectPartHandler,
and pathToBucketObjectAndVersion to correctly handle special characters
like '!', '+', and other RFC 3986 sub-delimiters that S3 clients may
percent-encode (e.g. '!' as %21).

* s3: add path validation to CopyObjectPartHandler

CopyObjectPartHandler was missing the validateTableBucketObjectPath
checks that CopyObjectHandler has, allowing potential path traversal
in the source bucket/object of copy part requests.

* s3: fix case-sensitive HeadersRegexp for copy source routing

The HeadersRegexp for X-Amz-Copy-Source used `%2F` which only matched
uppercase hex encoding. RFC 3986 allows both `%2F` and `%2f`, so
clients sending lowercase percent-encoding would bypass the copy
handler and hit PutObjectHandler instead. Add (?i) flag for
case-insensitive matching.

Also add test coverage for the versionId branch in
pathToBucketObjectAndVersion and for lowercase %2f routing.
This commit is contained in:
Chris Lu
2026-03-07 11:10:02 -08:00
committed by GitHub
parent 3f946fc0c0
commit d89eb8267f
3 changed files with 255 additions and 6 deletions

View File

@@ -38,7 +38,9 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request
// Copy source path.
rawCopySource := r.Header.Get("X-Amz-Copy-Source")
cpSrcPath, err := url.QueryUnescape(rawCopySource)
// Use PathUnescape (not QueryUnescape) because the copy source is a path,
// not a query string. QueryUnescape would incorrectly convert '+' to space.
cpSrcPath, err := url.PathUnescape(rawCopySource)
if err != nil {
// Save unescaped string as is.
cpSrcPath = rawCopySource
@@ -438,7 +440,7 @@ func pathToBucketObjectAndVersion(rawPath, decodedPath string) (bucket, object,
versionId = values.Get("versionId")
rawPathNoQuery := rawPath[:idx]
if unescaped, err := url.QueryUnescape(rawPathNoQuery); err == nil {
if unescaped, err := url.PathUnescape(rawPathNoQuery); err == nil {
pathForBucket = unescaped
} else {
pathForBucket = rawPathNoQuery
@@ -470,8 +472,9 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req
glog.V(4).Infof("CopyObjectPart: Raw copy source header=%q", rawCopySource)
// Try URL unescaping - AWS SDK sends URL-encoded copy sources
cpSrcPath, err := url.QueryUnescape(rawCopySource)
// Use PathUnescape (not QueryUnescape) because the copy source is a path,
// not a query string. QueryUnescape would incorrectly convert '+' to space.
cpSrcPath, err := url.PathUnescape(rawCopySource)
if err != nil {
// If unescaping fails, log and use original
glog.V(4).Infof("CopyObjectPart: Failed to unescape copy source %q: %v, using as-is", rawCopySource, err)
@@ -483,6 +486,17 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req
glog.V(4).Infof("CopyObjectPart: Parsed srcBucket=%q, srcObject=%q, srcVersionId=%q",
srcBucket, srcObject, srcVersionId)
if err := s3a.validateTableBucketObjectPath(dstBucket, dstObject); err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrAccessDenied)
return
}
if srcBucket != "" && srcBucket != dstBucket {
if err := s3a.validateTableBucketObjectPath(srcBucket, srcObject); err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrAccessDenied)
return
}
}
// If source object is empty or bucket is empty, reply back invalid copy source.
// Note: srcObject can be "/" for root-level objects, but empty string means parsing failed
if srcObject == "" || srcBucket == "" {