fix(s3): include directory markers in ListObjects without delimiter (#8704)
* fix(s3): include directory markers in ListObjects without delimiter (#8698) Directory key objects (zero-byte objects with keys ending in "/") created via PutObject were omitted from ListObjects/ListObjectsV2 results when no delimiter was specified. AWS S3 includes these as regular keys in Contents. The issue was in doListFilerEntries: when recursing into directories in non-delimiter mode, directory key objects were only emitted when prefixEndsOnDelimiter was true. Added an else branch to emit them in the general recursive case as well. * remove issue reference from inline comment * test: add child-under-marker and paginated listing coverage Extend test 6 to place a child object under the directory marker and paginate with MaxKeys=1 so the emit-then-recurse truncation path is exercised. * fix(test): skip directory markers in Spark temporary artifacts check The listing check now correctly shows directory markers (keys ending in "/") after the ListObjects fix. These 0-byte metadata objects are not data artifacts — filter them from the listing check since the HeadObject-based check already verifies their cleanup with a timeout.
This commit is contained in:
244
test/s3/normal/s3_list_empty_directory_test.go
Normal file
244
test/s3/normal/s3_list_empty_directory_test.go
Normal file
@@ -0,0 +1,244 @@
|
||||
package example
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/aws/aws-sdk-go/aws"
|
||||
"github.com/aws/aws-sdk-go/service/s3"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestS3ListObjectsEmptyDirectoryMarkers reproduces GitHub issue #8698:
|
||||
// S3 API ListObjects does not include empty directory markers (zero-byte
|
||||
// objects with keys ending in "/") created via PutObject.
|
||||
//
|
||||
// AWS S3 includes these markers as regular keys in Contents. SeaweedFS
|
||||
// was filtering them out during recursive directory listing.
|
||||
func TestS3ListObjectsEmptyDirectoryMarkers(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping integration test in short mode")
|
||||
}
|
||||
|
||||
cluster, err := startMiniCluster(t)
|
||||
require.NoError(t, err)
|
||||
defer cluster.Stop()
|
||||
|
||||
bucketName := createTestBucket(t, cluster, "test-empty-dirs-")
|
||||
|
||||
// Create a regular file at Empty/manifest.yaml
|
||||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{
|
||||
Bucket: aws.String(bucketName),
|
||||
Key: aws.String("Empty/manifest.yaml"),
|
||||
Body: bytes.NewReader([]byte("name: test\nversion: 1.0\n")),
|
||||
})
|
||||
require.NoError(t, err, "failed to create regular file")
|
||||
|
||||
// Create an empty directory marker via PutObject (key ending in "/", zero bytes).
|
||||
// This is how `aws s3api put-object --key "Empty/empty/"` works.
|
||||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{
|
||||
Bucket: aws.String(bucketName),
|
||||
Key: aws.String("Empty/empty/"),
|
||||
Body: bytes.NewReader([]byte{}),
|
||||
})
|
||||
require.NoError(t, err, "failed to create empty directory marker")
|
||||
|
||||
// Verify the directory marker exists via HeadObject
|
||||
_, err = cluster.s3Client.HeadObject(&s3.HeadObjectInput{
|
||||
Bucket: aws.String(bucketName),
|
||||
Key: aws.String("Empty/empty/"),
|
||||
})
|
||||
require.NoError(t, err, "directory marker should exist via HeadObject")
|
||||
|
||||
// Test 1: ListObjectsV2 with prefix (no delimiter) — the exact scenario from the issue.
|
||||
// AWS S3 returns both "Empty/empty/" and "Empty/manifest.yaml" in Contents.
|
||||
t.Run("ListV2_WithPrefix_NoDelimiter", func(t *testing.T) {
|
||||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(bucketName),
|
||||
Prefix: aws.String("Empty"),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
keys := collectKeys(resp.Contents)
|
||||
sort.Strings(keys)
|
||||
|
||||
assert.Equal(t, []string{"Empty/empty/", "Empty/manifest.yaml"}, keys,
|
||||
"both the directory marker and regular file should be listed")
|
||||
})
|
||||
|
||||
// Test 2: ListObjectsV1 with prefix (no delimiter).
|
||||
t.Run("ListV1_WithPrefix_NoDelimiter", func(t *testing.T) {
|
||||
resp, err := cluster.s3Client.ListObjects(&s3.ListObjectsInput{
|
||||
Bucket: aws.String(bucketName),
|
||||
Prefix: aws.String("Empty"),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
keys := collectKeysV1(resp.Contents)
|
||||
sort.Strings(keys)
|
||||
|
||||
assert.Equal(t, []string{"Empty/empty/", "Empty/manifest.yaml"}, keys,
|
||||
"both the directory marker and regular file should be listed")
|
||||
})
|
||||
|
||||
// Test 3: ListObjectsV2 without prefix and without delimiter.
|
||||
// All objects in the bucket should appear, including directory markers.
|
||||
t.Run("ListV2_NoPrefix_NoDelimiter", func(t *testing.T) {
|
||||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(bucketName),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
keys := collectKeys(resp.Contents)
|
||||
|
||||
assert.Contains(t, keys, "Empty/empty/", "directory marker should appear in listing")
|
||||
assert.Contains(t, keys, "Empty/manifest.yaml", "regular file should appear in listing")
|
||||
})
|
||||
|
||||
// Test 4: ListObjectsV2 with prefix "Empty/" and delimiter "/".
|
||||
// The directory marker "Empty/empty/" should appear as a CommonPrefix,
|
||||
// and "Empty/manifest.yaml" should appear in Contents.
|
||||
t.Run("ListV2_WithPrefix_WithDelimiter", func(t *testing.T) {
|
||||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(bucketName),
|
||||
Prefix: aws.String("Empty/"),
|
||||
Delimiter: aws.String("/"),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
keys := collectKeys(resp.Contents)
|
||||
prefixes := collectPrefixes(resp.CommonPrefixes)
|
||||
|
||||
assert.Contains(t, keys, "Empty/manifest.yaml", "regular file should be in Contents")
|
||||
assert.Contains(t, prefixes, "Empty/empty/", "directory marker should appear as CommonPrefix")
|
||||
})
|
||||
|
||||
// Test 5: Multiple empty directory markers at different nesting levels.
|
||||
t.Run("ListV2_NestedEmptyDirs", func(t *testing.T) {
|
||||
nestedBucket := createTestBucket(t, cluster, "test-nested-dirs-")
|
||||
|
||||
// Create nested structure:
|
||||
// dir/sub1/ (empty dir marker)
|
||||
// dir/sub2/deep/ (empty dir marker at deeper level)
|
||||
// dir/file.txt (regular file)
|
||||
_, err := cluster.s3Client.PutObject(&s3.PutObjectInput{
|
||||
Bucket: aws.String(nestedBucket),
|
||||
Key: aws.String("dir/sub1/"),
|
||||
Body: bytes.NewReader([]byte{}),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{
|
||||
Bucket: aws.String(nestedBucket),
|
||||
Key: aws.String("dir/sub2/deep/"),
|
||||
Body: bytes.NewReader([]byte{}),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{
|
||||
Bucket: aws.String(nestedBucket),
|
||||
Key: aws.String("dir/file.txt"),
|
||||
Body: bytes.NewReader([]byte("content")),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(nestedBucket),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
keys := collectKeys(resp.Contents)
|
||||
sort.Strings(keys)
|
||||
|
||||
assert.Equal(t, []string{"dir/file.txt", "dir/sub1/", "dir/sub2/deep/"}, keys,
|
||||
"all objects including nested empty directory markers should be listed")
|
||||
})
|
||||
|
||||
// Test 6: Directory marker with a child object, paginated with MaxKeys=1.
|
||||
// This exercises the emit-marker-then-recurse path under truncation:
|
||||
// the marker is emitted, maxKeys drops to 0, and the child must be
|
||||
// picked up on a subsequent page.
|
||||
t.Run("ListV2_MarkerWithChild_Paginated", func(t *testing.T) {
|
||||
siblingBucket := createTestBucket(t, cluster, "test-sibling-dirs-")
|
||||
|
||||
// Create:
|
||||
// docs/ (directory key object)
|
||||
// docs/readme.txt (child file inside the marker)
|
||||
// readme.txt (sibling file at top level)
|
||||
_, err := cluster.s3Client.PutObject(&s3.PutObjectInput{
|
||||
Bucket: aws.String(siblingBucket),
|
||||
Key: aws.String("docs/"),
|
||||
Body: bytes.NewReader([]byte{}),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{
|
||||
Bucket: aws.String(siblingBucket),
|
||||
Key: aws.String("docs/readme.txt"),
|
||||
Body: bytes.NewReader([]byte("inside docs")),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{
|
||||
Bucket: aws.String(siblingBucket),
|
||||
Key: aws.String("readme.txt"),
|
||||
Body: bytes.NewReader([]byte("hello")),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Unpaginated: all three keys should appear.
|
||||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(siblingBucket),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
keys := collectKeys(resp.Contents)
|
||||
sort.Strings(keys)
|
||||
|
||||
assert.Equal(t, []string{"docs/", "docs/readme.txt", "readme.txt"}, keys,
|
||||
"directory marker, its child, and sibling file should all be listed")
|
||||
|
||||
// Paginated with MaxKeys=1: collect all keys across pages.
|
||||
var allKeys []string
|
||||
var token *string
|
||||
for {
|
||||
pageResp, pageErr := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(siblingBucket),
|
||||
MaxKeys: aws.Int64(1),
|
||||
ContinuationToken: token,
|
||||
})
|
||||
require.NoError(t, pageErr)
|
||||
allKeys = append(allKeys, collectKeys(pageResp.Contents)...)
|
||||
if !aws.BoolValue(pageResp.IsTruncated) {
|
||||
break
|
||||
}
|
||||
token = pageResp.NextContinuationToken
|
||||
}
|
||||
sort.Strings(allKeys)
|
||||
|
||||
assert.Equal(t, []string{"docs/", "docs/readme.txt", "readme.txt"}, allKeys,
|
||||
"paginated listing should return all keys including marker and its child")
|
||||
})
|
||||
}
|
||||
|
||||
func collectKeys(contents []*s3.Object) []string {
|
||||
keys := make([]string, 0, len(contents))
|
||||
for _, obj := range contents {
|
||||
keys = append(keys, aws.StringValue(obj.Key))
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
func collectKeysV1(contents []*s3.Object) []string {
|
||||
return collectKeys(contents)
|
||||
}
|
||||
|
||||
func collectPrefixes(prefixes []*s3.CommonPrefix) []string {
|
||||
result := make([]string, 0, len(prefixes))
|
||||
for _, p := range prefixes {
|
||||
result = append(result, aws.StringValue(p.Prefix))
|
||||
}
|
||||
return result
|
||||
}
|
||||
@@ -50,6 +50,12 @@ print("WRITE_COUNT=" + str(count))
|
||||
keys := listObjectKeysByPrefix(t, env, "test", "issue-8285/")
|
||||
var temporaryKeys []string
|
||||
for _, key := range keys {
|
||||
// Skip directory markers (keys ending in "/") — these are 0-byte
|
||||
// metadata objects, not data artifacts. They are verified separately
|
||||
// via HeadObject with a timeout below.
|
||||
if strings.HasSuffix(key, "/") {
|
||||
continue
|
||||
}
|
||||
if hasTemporaryPathSegment(key) {
|
||||
temporaryKeys = append(temporaryKeys, key)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user