* Fix issue #6847: S3 chunked encoding includes headers in stored content - Add hasTrailer flag to s3ChunkedReader to track trailer presence - Update state transition logic to properly handle trailers in unsigned streaming - Enhance parseChunkChecksum to handle multiple trailer lines - Skip checksum verification for unsigned streaming uploads - Add test case for mixed format handling (unsigned headers with signed chunks) - Remove redundant CRLF reading in trailer processing This fixes the issue where chunk-signature and x-amz headers were appearing in stored file content when using chunked encoding with newer AWS SDKs. * Fix checksum validation for unsigned streaming uploads - Always validate checksum for data integrity regardless of signing - Correct checksum value in test case - Addresses PR review feedback about checksum verification * Add warning log when multiple checksum headers found in trailer - Log a warning when multiple valid checksum headers appear in trailers - Uses last checksum header as suggested by CodeRabbit reviewer - Improves debugging for edge cases with multiple checksum algorithms * Improve trailer parsing robustness in parseChunkChecksum - Remove redundant trimTrailingWhitespace call since readChunkLine already trims - Use bytes.TrimSpace for both key and value to handle whitespace around colon separator - Follows HTTP header specifications for optional whitespace around separators - Addresses Gemini Code Assist review feedback
This commit is contained in:
@@ -116,6 +116,7 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea
|
|||||||
}
|
}
|
||||||
|
|
||||||
checkSumWriter := getCheckSumWriter(checksumAlgorithm)
|
checkSumWriter := getCheckSumWriter(checksumAlgorithm)
|
||||||
|
hasTrailer := amzTrailerHeader != ""
|
||||||
|
|
||||||
return &s3ChunkedReader{
|
return &s3ChunkedReader{
|
||||||
cred: credential,
|
cred: credential,
|
||||||
@@ -129,6 +130,7 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea
|
|||||||
checkSumWriter: checkSumWriter,
|
checkSumWriter: checkSumWriter,
|
||||||
state: readChunkHeader,
|
state: readChunkHeader,
|
||||||
iam: iam,
|
iam: iam,
|
||||||
|
hasTrailer: hasTrailer,
|
||||||
}, s3err.ErrNone
|
}, s3err.ErrNone
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,6 +172,7 @@ type s3ChunkedReader struct {
|
|||||||
n uint64 // Unread bytes in chunk
|
n uint64 // Unread bytes in chunk
|
||||||
err error
|
err error
|
||||||
iam *IdentityAccessManagement
|
iam *IdentityAccessManagement
|
||||||
|
hasTrailer bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read chunk reads the chunk token signature portion.
|
// Read chunk reads the chunk token signature portion.
|
||||||
@@ -281,10 +284,10 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If we're using unsigned streaming upload, there is no signature to verify at each chunk.
|
// If we're using unsigned streaming upload, there is no signature to verify at each chunk.
|
||||||
if cr.chunkSignature != "" {
|
if cr.lastChunk && cr.hasTrailer {
|
||||||
cr.state = verifyChunk
|
|
||||||
} else if cr.lastChunk {
|
|
||||||
cr.state = readTrailerChunk
|
cr.state = readTrailerChunk
|
||||||
|
} else if cr.chunkSignature != "" {
|
||||||
|
cr.state = verifyChunk
|
||||||
} else {
|
} else {
|
||||||
cr.state = readChunkHeader
|
cr.state = readChunkHeader
|
||||||
}
|
}
|
||||||
@@ -304,7 +307,11 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
|
|||||||
// This implementation currently only supports the first case.
|
// This implementation currently only supports the first case.
|
||||||
// TODO: Implement the second case (signed upload with additional checksum computation for each chunk)
|
// TODO: Implement the second case (signed upload with additional checksum computation for each chunk)
|
||||||
|
|
||||||
extractedCheckSumAlgorithm, extractedChecksum := parseChunkChecksum(cr.reader)
|
extractedCheckSumAlgorithm, extractedChecksum, err := parseChunkChecksum(cr.reader)
|
||||||
|
if err != nil {
|
||||||
|
cr.err = err
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
if extractedCheckSumAlgorithm.String() != cr.checkSumAlgorithm {
|
if extractedCheckSumAlgorithm.String() != cr.checkSumAlgorithm {
|
||||||
errorMessage := fmt.Sprintf("checksum algorithm in trailer '%s' does not match the one advertised in the header '%s'", extractedCheckSumAlgorithm.String(), cr.checkSumAlgorithm)
|
errorMessage := fmt.Sprintf("checksum algorithm in trailer '%s' does not match the one advertised in the header '%s'", extractedCheckSumAlgorithm.String(), cr.checkSumAlgorithm)
|
||||||
@@ -313,6 +320,7 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
|
|||||||
return 0, cr.err
|
return 0, cr.err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate checksum for data integrity (required for both signed and unsigned streaming with trailers)
|
||||||
computedChecksum := cr.checkSumWriter.Sum(nil)
|
computedChecksum := cr.checkSumWriter.Sum(nil)
|
||||||
base64Checksum := base64.StdEncoding.EncodeToString(computedChecksum)
|
base64Checksum := base64.StdEncoding.EncodeToString(computedChecksum)
|
||||||
if string(extractedChecksum) != base64Checksum {
|
if string(extractedChecksum) != base64Checksum {
|
||||||
@@ -324,11 +332,6 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
|
|||||||
// TODO: Extract signature from trailer chunk and verify it.
|
// TODO: Extract signature from trailer chunk and verify it.
|
||||||
// For now, we just read the trailer chunk and discard it.
|
// For now, we just read the trailer chunk and discard it.
|
||||||
|
|
||||||
// Reading remaining CRLF.
|
|
||||||
for i := 0; i < 2; i++ {
|
|
||||||
cr.err = readCRLF(cr.reader)
|
|
||||||
}
|
|
||||||
|
|
||||||
cr.state = eofChunk
|
cr.state = eofChunk
|
||||||
|
|
||||||
case readChunk:
|
case readChunk:
|
||||||
@@ -506,41 +509,37 @@ func parseS3ChunkExtension(buf []byte) ([]byte, []byte) {
|
|||||||
return buf[:semi], parseChunkSignature(buf[semi:])
|
return buf[:semi], parseChunkSignature(buf[semi:])
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseChunkChecksum(b *bufio.Reader) (ChecksumAlgorithm, []byte) {
|
func parseChunkChecksum(b *bufio.Reader) (ChecksumAlgorithm, []byte, error) {
|
||||||
// When using unsigned upload, this would be the raw contents of the trailer chunk:
|
// Read trailer lines until empty line
|
||||||
//
|
var checksumAlgorithm ChecksumAlgorithm
|
||||||
// x-amz-checksum-crc32:YABb/g==\n\r\n\r\n // Trailer chunk (note optional \n character)
|
var checksum []byte
|
||||||
// \r\n // CRLF
|
|
||||||
//
|
|
||||||
// When using signed upload with an additional checksum algorithm, this would be the raw contents of the trailer chunk:
|
|
||||||
//
|
|
||||||
// x-amz-checksum-crc32:YABb/g==\n\r\n // Trailer chunk (note optional \n character)
|
|
||||||
// trailer-signature\r\n
|
|
||||||
// \r\n // CRLF
|
|
||||||
//
|
|
||||||
|
|
||||||
// x-amz-checksum-crc32:YABb/g==\n
|
for {
|
||||||
bytesRead, err := readChunkLine(b)
|
bytesRead, err := readChunkLine(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ChecksumAlgorithmNone, nil
|
return ChecksumAlgorithmNone, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(bytesRead) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
parts := bytes.SplitN(bytesRead, []byte(":"), 2)
|
||||||
|
if len(parts) == 2 {
|
||||||
|
key := string(bytes.TrimSpace(parts[0]))
|
||||||
|
value := bytes.TrimSpace(parts[1])
|
||||||
|
if alg, err := extractChecksumAlgorithm(key); err == nil {
|
||||||
|
if checksumAlgorithm != ChecksumAlgorithmNone {
|
||||||
|
glog.V(3).Infof("multiple checksum headers found in trailer, using last: %s", key)
|
||||||
|
}
|
||||||
|
checksumAlgorithm = alg
|
||||||
|
checksum = value
|
||||||
|
}
|
||||||
|
// Ignore other trailer headers like x-amz-trailer-signature
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split on ':'
|
return checksumAlgorithm, checksum, nil
|
||||||
parts := bytes.SplitN(bytesRead, []byte(":"), 2)
|
|
||||||
checksumKey := string(parts[0])
|
|
||||||
checksumValue := parts[1]
|
|
||||||
|
|
||||||
// Discard all trailing whitespace characters
|
|
||||||
checksumValue = trimTrailingWhitespace(checksumValue)
|
|
||||||
|
|
||||||
// If the checksum key is not a supported checksum algorithm, return an error.
|
|
||||||
// TODO: Bubble that error up to the caller
|
|
||||||
extractedAlgorithm, err := extractChecksumAlgorithm(checksumKey)
|
|
||||||
if err != nil {
|
|
||||||
return ChecksumAlgorithmNone, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return extractedAlgorithm, checksumValue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseChunkSignature(chunk []byte) []byte {
|
func parseChunkSignature(chunk []byte) []byte {
|
||||||
|
|||||||
Reference in New Issue
Block a user