From d6a872c4b9337db4a4c9c6cdb1277ee5e472377a Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 21 Mar 2026 19:31:56 -0700 Subject: [PATCH] Preserve explicit directory markers with octet-stream MIME (#8726) * Preserve octet-stream MIME on explicit directory markers * Run empty directory marker regression in CI * Run S3 Spark workflow for filer changes --- .../s3-example-integration-tests.yml | 8 +++ .github/workflows/s3-spark-tests.yml | 1 + .../s3/normal/s3_list_empty_directory_test.go | 36 ++++++++++ weed/filer/filerstore_wrapper.go | 17 +++-- weed/filer/filerstore_wrapper_test.go | 71 +++++++++++++++++++ 5 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 weed/filer/filerstore_wrapper_test.go diff --git a/.github/workflows/s3-example-integration-tests.yml b/.github/workflows/s3-example-integration-tests.yml index cbce56fa4..447e362ec 100644 --- a/.github/workflows/s3-example-integration-tests.yml +++ b/.github/workflows/s3-example-integration-tests.yml @@ -47,6 +47,14 @@ jobs: echo "=== Running S3 DeleteBucketNotEmpty Tests ===" go test -v -timeout=60s -run TestS3DeleteBucketNotEmpty ./... + - name: Run S3 Empty Directory Marker Tests + timeout-minutes: 15 + working-directory: test/s3/normal + run: | + set -x + echo "=== Running S3 Empty Directory Marker Tests ===" + go test -v -timeout=180s -run TestS3ListObjectsEmptyDirectoryMarkers ./... + - name: Run IAM Integration Tests timeout-minutes: 15 working-directory: test/s3/normal diff --git a/.github/workflows/s3-spark-tests.yml b/.github/workflows/s3-spark-tests.yml index 6a3623d0a..106ee93c5 100644 --- a/.github/workflows/s3-spark-tests.yml +++ b/.github/workflows/s3-spark-tests.yml @@ -4,6 +4,7 @@ on: pull_request: paths: - 'weed/s3api/**' + - 'weed/filer/**' - 'test/s3/spark/**' - 'test/s3tables/testutil/**' - '.github/workflows/s3-spark-tests.yml' diff --git a/test/s3/normal/s3_list_empty_directory_test.go b/test/s3/normal/s3_list_empty_directory_test.go index b8c64180f..20152e71e 100644 --- a/test/s3/normal/s3_list_empty_directory_test.go +++ b/test/s3/normal/s3_list_empty_directory_test.go @@ -221,6 +221,42 @@ func TestS3ListObjectsEmptyDirectoryMarkers(t *testing.T) { assert.Equal(t, []string{"docs/", "docs/readme.txt", "readme.txt"}, allKeys, "paginated listing should return all keys including marker and its child") }) + + t.Run("ListExplicitDirectoryMarkerWithOctetStreamContentType", func(t *testing.T) { + bucketWithContentType := createTestBucket(t, cluster, "test-explicit-dir-") + objectKey := "test-content/empty/" + + _, err := cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(bucketWithContentType), + Key: aws.String(objectKey), + Body: bytes.NewReader([]byte{}), + ContentType: aws.String("application/octet-stream"), + }) + require.NoError(t, err, "failed to create explicit directory marker with octet-stream content type") + + headResp, err := cluster.s3Client.HeadObject(&s3.HeadObjectInput{ + Bucket: aws.String(bucketWithContentType), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "directory marker should exist via HeadObject") + assert.Equal(t, "application/octet-stream", aws.StringValue(headResp.ContentType)) + + respV1, err := cluster.s3Client.ListObjects(&s3.ListObjectsInput{ + Bucket: aws.String(bucketWithContentType), + Prefix: aws.String("test-content"), + }) + require.NoError(t, err) + assert.Equal(t, []string{objectKey}, collectKeysV1(respV1.Contents), + "explicit empty directory markers with octet-stream content type should be listed") + + respV2, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(bucketWithContentType), + Prefix: aws.String("test-content"), + }) + require.NoError(t, err) + assert.Equal(t, []string{objectKey}, collectKeys(respV2.Contents), + "explicit empty directory markers with octet-stream content type should be listed") + }) } func collectKeys(contents []*s3.Object) []string { diff --git a/weed/filer/filerstore_wrapper.go b/weed/filer/filerstore_wrapper.go index 485f3c9ea..0674c6d79 100644 --- a/weed/filer/filerstore_wrapper.go +++ b/weed/filer/filerstore_wrapper.go @@ -128,9 +128,7 @@ func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) err }() filer_pb.BeforeEntrySerialization(entry.GetChunks()) - if entry.Mime == "application/octet-stream" { - entry.Mime = "" - } + normalizeEntryMimeForStore(entry) if err := fsw.handleUpdateToHardLinks(ctx, entry); err != nil { return err @@ -150,9 +148,7 @@ func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) err }() filer_pb.BeforeEntrySerialization(entry.GetChunks()) - if entry.Mime == "application/octet-stream" { - entry.Mime = "" - } + normalizeEntryMimeForStore(entry) if err := fsw.handleUpdateToHardLinks(ctx, entry); err != nil { return err @@ -162,6 +158,15 @@ func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) err return actualStore.UpdateEntry(ctx, entry) } +func normalizeEntryMimeForStore(entry *Entry) { + if entry == nil { + return + } + if !entry.IsDirectory() && entry.Mime == "application/octet-stream" { + entry.Mime = "" + } +} + func (fsw *FilerStoreWrapper) FindEntry(ctx context.Context, fp util.FullPath) (entry *Entry, err error) { ctx = context.WithoutCancel(ctx) actualStore := fsw.getActualStore(fp) diff --git a/weed/filer/filerstore_wrapper_test.go b/weed/filer/filerstore_wrapper_test.go new file mode 100644 index 000000000..c3ef32018 --- /dev/null +++ b/weed/filer/filerstore_wrapper_test.go @@ -0,0 +1,71 @@ +package filer + +import ( + "context" + "os" + "testing" + + "github.com/seaweedfs/seaweedfs/weed/util" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFilerStoreWrapperMimeNormalization(t *testing.T) { + tests := []struct { + name string + mode os.FileMode + wantMime string + }{ + { + name: "files strip octet-stream", + mode: 0o660, + wantMime: "", + }, + { + name: "directories keep octet-stream", + mode: os.ModeDir | 0o770, + wantMime: "application/octet-stream", + }, + } + + operations := []struct { + name string + run func(*FilerStoreWrapper, context.Context, *Entry) error + }{ + { + name: "insert", + run: func(fsw *FilerStoreWrapper, ctx context.Context, entry *Entry) error { + return fsw.InsertEntry(ctx, entry) + }, + }, + { + name: "update", + run: func(fsw *FilerStoreWrapper, ctx context.Context, entry *Entry) error { + return fsw.UpdateEntry(ctx, entry) + }, + }, + } + + for _, tt := range tests { + for _, op := range operations { + t.Run(tt.name+"/"+op.name, func(t *testing.T) { + store := newStubFilerStore() + wrapper := NewFilerStoreWrapper(store) + entry := &Entry{ + FullPath: util.FullPath("/buckets/test/object"), + Attr: Attr{ + Mode: tt.mode, + Mime: "application/octet-stream", + }, + } + + err := op.run(wrapper, context.Background(), entry) + require.NoError(t, err) + + storedEntry, findErr := store.FindEntry(context.Background(), entry.FullPath) + require.NoError(t, findErr) + assert.Equal(t, tt.wantMime, storedEntry.Mime) + }) + } + } +}