s3: use url.PathUnescape for X-Amz-Copy-Source header (#8545)

* s3: use url.PathUnescape for X-Amz-Copy-Source header (#8544) The X-Amz-Copy-Source header is a URL-encoded path, not a query string. Using url.QueryUnescape incorrectly converts literal '+' characters to spaces, which can cause object key mismatches during copy operations. Switch to url.PathUnescape in CopyObjectHandler, CopyObjectPartHandler, and pathToBucketObjectAndVersion to correctly handle special characters like '!', '+', and other RFC 3986 sub-delimiters that S3 clients may percent-encode (e.g. '!' as %21). * s3: add path validation to CopyObjectPartHandler CopyObjectPartHandler was missing the validateTableBucketObjectPath checks that CopyObjectHandler has, allowing potential path traversal in the source bucket/object of copy part requests. * s3: fix case-sensitive HeadersRegexp for copy source routing The HeadersRegexp for X-Amz-Copy-Source used `%2F` which only matched uppercase hex encoding. RFC 3986 allows both `%2F` and `%2f`, so clients sending lowercase percent-encoding would bypass the copy handler and hit PutObjectHandler instead. Add (?i) flag for case-insensitive matching. Also add test coverage for the versionId branch in pathToBucketObjectAndVersion and for lowercase %2f routing.
2026-03-07 11:10:02 -08:00
parent 3f946fc0c0
commit d89eb8267f
3 changed files with 255 additions and 6 deletions
--- a/weed/s3api/s3api_object_handlers_copy.go
+++ b/weed/s3api/s3api_object_handlers_copy.go
@@ -38,7 +38,9 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request

 	// Copy source path.
 	rawCopySource := r.Header.Get("X-Amz-Copy-Source")
-	cpSrcPath, err := url.QueryUnescape(rawCopySource)
+	// Use PathUnescape (not QueryUnescape) because the copy source is a path,
+	// not a query string. QueryUnescape would incorrectly convert '+' to space.
+	cpSrcPath, err := url.PathUnescape(rawCopySource)
 	if err != nil {
 		// Save unescaped string as is.
 		cpSrcPath = rawCopySource
@@ -438,7 +440,7 @@ func pathToBucketObjectAndVersion(rawPath, decodedPath string) (bucket, object,
 				versionId = values.Get("versionId")

 				rawPathNoQuery := rawPath[:idx]
-				if unescaped, err := url.QueryUnescape(rawPathNoQuery); err == nil {
+				if unescaped, err := url.PathUnescape(rawPathNoQuery); err == nil {
 					pathForBucket = unescaped
 				} else {
 					pathForBucket = rawPathNoQuery
@@ -470,8 +472,9 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req

 	glog.V(4).Infof("CopyObjectPart: Raw copy source header=%q", rawCopySource)

-	// Try URL unescaping - AWS SDK sends URL-encoded copy sources
-	cpSrcPath, err := url.QueryUnescape(rawCopySource)
+	// Use PathUnescape (not QueryUnescape) because the copy source is a path,
+	// not a query string. QueryUnescape would incorrectly convert '+' to space.
+	cpSrcPath, err := url.PathUnescape(rawCopySource)
 	if err != nil {
 		// If unescaping fails, log and use original
 		glog.V(4).Infof("CopyObjectPart: Failed to unescape copy source %q: %v, using as-is", rawCopySource, err)
@@ -483,6 +486,17 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req
 	glog.V(4).Infof("CopyObjectPart: Parsed srcBucket=%q, srcObject=%q, srcVersionId=%q",
 		srcBucket, srcObject, srcVersionId)

+	if err := s3a.validateTableBucketObjectPath(dstBucket, dstObject); err != nil {
+		s3err.WriteErrorResponse(w, r, s3err.ErrAccessDenied)
+		return
+	}
+	if srcBucket != "" && srcBucket != dstBucket {
+		if err := s3a.validateTableBucketObjectPath(srcBucket, srcObject); err != nil {
+			s3err.WriteErrorResponse(w, r, s3err.ErrAccessDenied)
+			return
+		}
+	}
+
 	// If source object is empty or bucket is empty, reply back invalid copy source.
 	// Note: srcObject can be "/" for root-level objects, but empty string means parsing failed
 	if srcObject == "" || srcBucket == "" {