s3api: make conditional mutations atomic and AWS-compatible (#8802)

* s3api: serialize conditional write finalization

* s3api: add conditional delete mutation checks

* s3api: enforce destination conditions for copy

* s3api: revalidate multipart completion under lock

* s3api: rollback failed put finalization hooks

* s3api: report delete-marker version deletions

* s3api: fix copy destination versioning edge cases

* s3api: make versioned multipart completion idempotent

* test/s3: cover conditional mutation regressions

* s3api: rollback failed copy version finalization

* s3api: resolve suspended delete conditions via latest entry

* s3api: remove copy test null-version injection

* s3api: reject out-of-order multipart completions

* s3api: preserve multipart replay version metadata

* s3api: surface copy destination existence errors

* s3api: simplify delete condition target resolution

* test/s3: make conditional delete assertions order independent

* test/s3: add distributed lock gateway integration

* s3api: fail closed multipart versioned completion

* s3api: harden copy metadata and overwrite paths

* s3api: create delete markers for suspended deletes

* s3api: allow duplicate multipart completion parts
This commit is contained in:
Chris Lu
2026-03-27 19:22:26 -07:00
committed by GitHub
parent bf2a2d2538
commit 0adb78bc6b
19 changed files with 2545 additions and 688 deletions

View File

@@ -0,0 +1,135 @@
package delete
import (
"bytes"
"context"
"errors"
"testing"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/aws/smithy-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestConditionalDeleteIfMatchOnLatestVersion(t *testing.T) {
client := getTestClient(t)
bucket := createTestBucket(t, client)
defer cleanupBucket(t, client, bucket)
key := "conditional-delete.txt"
putResp, err := client.PutObject(context.TODO(), &s3.PutObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
Body: bytes.NewReader([]byte("versioned body")),
})
require.NoError(t, err)
require.NotNil(t, putResp.ETag)
_, err = client.DeleteObject(context.TODO(), &s3.DeleteObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
IfMatch: aws.String(`"not-the-current-etag"`),
})
require.Error(t, err, "DeleteObject should reject a mismatched If-Match header")
var apiErr smithy.APIError
if assert.True(t, errors.As(err, &apiErr), "Expected smithy API error for conditional delete") {
assert.Equal(t, "PreconditionFailed", apiErr.ErrorCode())
}
_, err = client.HeadObject(context.TODO(), &s3.HeadObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
})
require.NoError(t, err, "Object should remain current after a failed conditional delete")
deleteResp, err := client.DeleteObject(context.TODO(), &s3.DeleteObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
IfMatch: putResp.ETag,
})
require.NoError(t, err)
require.NotNil(t, deleteResp.DeleteMarker)
assert.True(t, *deleteResp.DeleteMarker, "Successful conditional delete on a versioned bucket should create a delete marker")
require.NotNil(t, deleteResp.VersionId)
_, err = client.HeadObject(context.TODO(), &s3.HeadObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
})
require.Error(t, err, "Delete marker should hide the current object after a successful conditional delete")
}
func TestConditionalMultiDeletePerObjectETag(t *testing.T) {
client := getTestClient(t)
bucket := createTestBucket(t, client)
defer cleanupBucket(t, client, bucket)
okKey := "delete-ok.txt"
failKey := "delete-fail.txt"
okPutResp, err := client.PutObject(context.TODO(), &s3.PutObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(okKey),
Body: bytes.NewReader([]byte("delete me")),
})
require.NoError(t, err)
require.NotNil(t, okPutResp.ETag)
_, err = client.PutObject(context.TODO(), &s3.PutObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(failKey),
Body: bytes.NewReader([]byte("keep me")),
})
require.NoError(t, err)
deleteResp, err := client.DeleteObjects(context.TODO(), &s3.DeleteObjectsInput{
Bucket: aws.String(bucket),
Delete: &types.Delete{
Objects: []types.ObjectIdentifier{
{
Key: aws.String(okKey),
ETag: okPutResp.ETag,
},
{
Key: aws.String(failKey),
ETag: aws.String(`"mismatched-etag"`),
},
},
},
})
require.NoError(t, err)
require.Len(t, deleteResp.Deleted, 1, "One object should satisfy its ETag precondition")
require.Len(t, deleteResp.Errors, 1, "One object should report a precondition failure")
deletedKeys := make([]string, 0, len(deleteResp.Deleted))
for _, deleted := range deleteResp.Deleted {
deletedKeys = append(deletedKeys, aws.ToString(deleted.Key))
}
assert.Contains(t, deletedKeys, okKey)
var matchedError *types.Error
for i := range deleteResp.Errors {
if aws.ToString(deleteResp.Errors[i].Key) == failKey {
matchedError = &deleteResp.Errors[i]
break
}
}
if assert.NotNil(t, matchedError, "Expected error entry for failed key") {
assert.Equal(t, "PreconditionFailed", aws.ToString(matchedError.Code))
}
_, err = client.HeadObject(context.TODO(), &s3.HeadObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(okKey),
})
require.Error(t, err, "Successfully deleted key should no longer be current")
_, err = client.HeadObject(context.TODO(), &s3.HeadObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(failKey),
})
require.NoError(t, err, "Object with mismatched ETag should remain untouched")
}

View File

@@ -0,0 +1,523 @@
package distributed_lock
import (
"bufio"
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/seaweedfs/seaweedfs/test/volume_server/framework"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
const (
distributedLockTestRegion = "us-east-1"
distributedLockTestAccessKey = "some_access_key1"
distributedLockTestSecretKey = "some_secret_key1"
distributedLockTestGroup = "distributed-lock-it"
)
type distributedLockCluster struct {
t testing.TB
baseDir string
configDir string
logsDir string
keepLogs bool
weedBinary string
filerGroup string
s3Config string
masterPort int
masterGrpcPort int
volumePort int
volumeGrpcPort int
filerPorts []int
filerGrpcPorts []int
s3Ports []int
s3GrpcPorts []int
masterCmd *exec.Cmd
volumeCmd *exec.Cmd
filerCmds []*exec.Cmd
s3Cmds []*exec.Cmd
logFiles []*os.File
cleanupOnce sync.Once
}
type s3IdentityConfig struct {
Identities []s3Identity `json:"identities"`
}
type s3Identity struct {
Name string `json:"name"`
Credentials []s3Credential `json:"credentials,omitempty"`
Actions []string `json:"actions"`
}
type s3Credential struct {
AccessKey string `json:"accessKey"`
SecretKey string `json:"secretKey"`
}
func startDistributedLockCluster(t *testing.T) *distributedLockCluster {
t.Helper()
weedBinary, err := framework.FindOrBuildWeedBinary()
require.NoError(t, err, "resolve weed binary")
baseDir, err := os.MkdirTemp("", "seaweedfs_s3_distributed_lock_")
require.NoError(t, err, "create temp directory")
cluster := &distributedLockCluster{
t: t,
baseDir: baseDir,
configDir: filepath.Join(baseDir, "config"),
logsDir: filepath.Join(baseDir, "logs"),
keepLogs: os.Getenv("S3_DISTRIBUTED_LOCK_KEEP_LOGS") == "1",
weedBinary: weedBinary,
filerGroup: distributedLockTestGroup,
filerCmds: make([]*exec.Cmd, 0, 2),
s3Cmds: make([]*exec.Cmd, 0, 2),
}
t.Cleanup(cluster.Stop)
dirs := []string{
cluster.configDir,
cluster.logsDir,
filepath.Join(baseDir, "master"),
filepath.Join(baseDir, "volume"),
filepath.Join(baseDir, "filer0"),
filepath.Join(baseDir, "filer1"),
}
for _, dir := range dirs {
require.NoError(t, os.MkdirAll(dir, 0o755), "create %s", dir)
}
ports, err := allocatePorts(12)
require.NoError(t, err, "allocate ports")
cluster.masterPort = ports[0]
cluster.masterGrpcPort = ports[1]
cluster.volumePort = ports[2]
cluster.volumeGrpcPort = ports[3]
cluster.filerPorts = []int{ports[4], ports[6]}
cluster.filerGrpcPorts = []int{ports[5], ports[7]}
cluster.s3Ports = []int{ports[8], ports[10]}
cluster.s3GrpcPorts = []int{ports[9], ports[11]}
require.NoError(t, cluster.writeSecurityConfig(), "write security config")
require.NoError(t, cluster.writeS3Config(), "write s3 config")
require.NoError(t, cluster.startMaster(), "start master")
require.NoError(t, cluster.waitForHTTP("http://"+cluster.masterHTTPAddress()+"/dir/status", 30*time.Second), "wait for master\n%s", cluster.tailLog("master.log"))
require.NoError(t, cluster.startVolume(), "start volume")
require.NoError(t, cluster.waitForHTTP("http://"+cluster.volumeHTTPAddress()+"/status", 30*time.Second), "wait for volume\n%s", cluster.tailLog("volume.log"))
require.NoError(t, cluster.waitForTCP(cluster.volumeGRPCAddress(), 30*time.Second), "wait for volume grpc\n%s", cluster.tailLog("volume.log"))
for i := 0; i < 2; i++ {
require.NoError(t, cluster.startFiler(i), "start filer %d", i)
require.NoError(t, cluster.waitForTCP(cluster.filerGRPCAddress(i), 30*time.Second), "wait for filer %d grpc\n%s", i, cluster.tailLog(fmt.Sprintf("filer%d.log", i)))
}
require.NoError(t, cluster.waitForFilerCount(2, 30*time.Second), "wait for filer group registration")
for i := 0; i < 2; i++ {
require.NoError(t, cluster.startS3(i), "start s3 %d", i)
client := cluster.newS3Client(t, cluster.s3Endpoint(i))
require.NoError(t, cluster.waitForS3Ready(client, 30*time.Second), "wait for s3 %d\n%s", i, cluster.tailLog(fmt.Sprintf("s3-%d.log", i)))
}
return cluster
}
func (c *distributedLockCluster) Stop() {
if c == nil {
return
}
c.cleanupOnce.Do(func() {
for i := len(c.s3Cmds) - 1; i >= 0; i-- {
stopProcess(c.s3Cmds[i])
}
for i := len(c.filerCmds) - 1; i >= 0; i-- {
stopProcess(c.filerCmds[i])
}
stopProcess(c.volumeCmd)
stopProcess(c.masterCmd)
for _, f := range c.logFiles {
_ = f.Close()
}
if !c.keepLogs && !c.t.Failed() {
_ = os.RemoveAll(c.baseDir)
} else if c.baseDir != "" {
c.t.Logf("distributed lock integration logs kept at %s", c.baseDir)
}
})
}
func (c *distributedLockCluster) masterHTTPAddress() string {
return net.JoinHostPort("127.0.0.1", strconv.Itoa(c.masterPort))
}
func (c *distributedLockCluster) masterGRPCAddress() string {
return net.JoinHostPort("127.0.0.1", strconv.Itoa(c.masterGrpcPort))
}
func (c *distributedLockCluster) volumeHTTPAddress() string {
return net.JoinHostPort("127.0.0.1", strconv.Itoa(c.volumePort))
}
func (c *distributedLockCluster) volumeGRPCAddress() string {
return net.JoinHostPort("127.0.0.1", strconv.Itoa(c.volumeGrpcPort))
}
func (c *distributedLockCluster) filerServerAddress(index int) pb.ServerAddress {
return pb.NewServerAddress("127.0.0.1", c.filerPorts[index], c.filerGrpcPorts[index])
}
func (c *distributedLockCluster) filerGRPCAddress(index int) string {
return net.JoinHostPort("127.0.0.1", strconv.Itoa(c.filerGrpcPorts[index]))
}
func (c *distributedLockCluster) s3Endpoint(index int) string {
return fmt.Sprintf("http://127.0.0.1:%d", c.s3Ports[index])
}
func (c *distributedLockCluster) startMaster() error {
logFile, err := c.openLog("master.log")
if err != nil {
return err
}
args := []string{
"-config_dir=" + c.configDir,
"master",
"-ip=127.0.0.1",
"-ip.bind=127.0.0.1",
"-port=" + strconv.Itoa(c.masterPort),
"-port.grpc=" + strconv.Itoa(c.masterGrpcPort),
"-mdir=" + filepath.Join(c.baseDir, "master"),
"-peers=none",
"-volumeSizeLimitMB=32",
"-defaultReplication=000",
}
c.masterCmd = exec.Command(c.weedBinary, args...)
c.masterCmd.Dir = c.baseDir
c.masterCmd.Stdout = logFile
c.masterCmd.Stderr = logFile
return c.masterCmd.Start()
}
func (c *distributedLockCluster) startVolume() error {
logFile, err := c.openLog("volume.log")
if err != nil {
return err
}
masterAddress := string(pb.NewServerAddress("127.0.0.1", c.masterPort, c.masterGrpcPort))
args := []string{
"-config_dir=" + c.configDir,
"volume",
"-ip=127.0.0.1",
"-ip.bind=127.0.0.1",
"-port=" + strconv.Itoa(c.volumePort),
"-port.grpc=" + strconv.Itoa(c.volumeGrpcPort),
"-dir=" + filepath.Join(c.baseDir, "volume"),
"-max=16",
"-master=" + masterAddress,
"-readMode=proxy",
}
c.volumeCmd = exec.Command(c.weedBinary, args...)
c.volumeCmd.Dir = c.baseDir
c.volumeCmd.Stdout = logFile
c.volumeCmd.Stderr = logFile
return c.volumeCmd.Start()
}
func (c *distributedLockCluster) startFiler(index int) error {
logFile, err := c.openLog(fmt.Sprintf("filer%d.log", index))
if err != nil {
return err
}
masterAddress := string(pb.NewServerAddress("127.0.0.1", c.masterPort, c.masterGrpcPort))
args := []string{
"-config_dir=" + c.configDir,
"filer",
"-master=" + masterAddress,
"-filerGroup=" + c.filerGroup,
"-ip=127.0.0.1",
"-ip.bind=127.0.0.1",
"-port=" + strconv.Itoa(c.filerPorts[index]),
"-port.grpc=" + strconv.Itoa(c.filerGrpcPorts[index]),
"-defaultStoreDir=" + filepath.Join(c.baseDir, fmt.Sprintf("filer%d", index)),
}
cmd := exec.Command(c.weedBinary, args...)
cmd.Dir = c.baseDir
cmd.Stdout = logFile
cmd.Stderr = logFile
if err := cmd.Start(); err != nil {
return err
}
c.filerCmds = append(c.filerCmds, cmd)
return nil
}
func (c *distributedLockCluster) startS3(index int) error {
logFile, err := c.openLog(fmt.Sprintf("s3-%d.log", index))
if err != nil {
return err
}
filers := []string{string(c.filerServerAddress(0)), string(c.filerServerAddress(1))}
if index%2 == 1 {
filers[0], filers[1] = filers[1], filers[0]
}
args := []string{
"-config_dir=" + c.configDir,
"s3",
"-ip.bind=127.0.0.1",
"-port=" + strconv.Itoa(c.s3Ports[index]),
"-port.grpc=" + strconv.Itoa(c.s3GrpcPorts[index]),
"-port.iceberg=0",
"-filer=" + strings.Join(filers, ","),
"-config=" + c.s3Config,
"-iam.readOnly=false",
}
cmd := exec.Command(c.weedBinary, args...)
cmd.Dir = c.baseDir
cmd.Stdout = logFile
cmd.Stderr = logFile
if err := cmd.Start(); err != nil {
return err
}
c.s3Cmds = append(c.s3Cmds, cmd)
return nil
}
func (c *distributedLockCluster) writeSecurityConfig() error {
return os.WriteFile(filepath.Join(c.configDir, "security.toml"), []byte("# generated for distributed lock integration tests\n"), 0o644)
}
func (c *distributedLockCluster) writeS3Config() error {
configPath := filepath.Join(c.configDir, "s3.json")
payload := s3IdentityConfig{
Identities: []s3Identity{
{
Name: "distributed-lock-admin",
Credentials: []s3Credential{
{
AccessKey: distributedLockTestAccessKey,
SecretKey: distributedLockTestSecretKey,
},
},
Actions: []string{"Admin", "Read", "List", "Tagging", "Write"},
},
},
}
data, err := json.MarshalIndent(payload, "", " ")
if err != nil {
return err
}
if err := os.WriteFile(configPath, data, 0o644); err != nil {
return err
}
c.s3Config = configPath
return nil
}
func (c *distributedLockCluster) newS3Client(t testing.TB, endpoint string) *s3.Client {
t.Helper()
cfg, err := config.LoadDefaultConfig(context.Background(),
config.WithRegion(distributedLockTestRegion),
config.WithRetryMaxAttempts(1),
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
distributedLockTestAccessKey,
distributedLockTestSecretKey,
"",
)),
)
require.NoError(t, err, "load aws config")
return s3.NewFromConfig(cfg, func(o *s3.Options) {
o.BaseEndpoint = aws.String(endpoint)
o.UsePathStyle = true
})
}
func (c *distributedLockCluster) waitForS3Ready(client *s3.Client, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
_, err := client.ListBuckets(ctx, &s3.ListBucketsInput{})
cancel()
if err == nil {
return nil
}
time.Sleep(200 * time.Millisecond)
}
return fmt.Errorf("timed out waiting for s3 readiness")
}
func (c *distributedLockCluster) waitForFilerCount(expected int, timeout time.Duration) error {
conn, err := grpc.NewClient(c.masterGRPCAddress(), grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return err
}
defer conn.Close()
client := master_pb.NewSeaweedClient(conn)
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
resp, err := client.ListClusterNodes(ctx, &master_pb.ListClusterNodesRequest{
ClientType: "filer",
FilerGroup: c.filerGroup,
})
cancel()
if err == nil && len(resp.ClusterNodes) >= expected {
return nil
}
time.Sleep(200 * time.Millisecond)
}
return fmt.Errorf("timed out waiting for %d filers in group %q", expected, c.filerGroup)
}
func (c *distributedLockCluster) waitForHTTP(url string, timeout time.Duration) error {
client := &net.Dialer{Timeout: time.Second}
httpClient := &httpClientWithDialer{dialer: client}
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if err := httpClient.Get(url); err == nil {
return nil
}
time.Sleep(200 * time.Millisecond)
}
return fmt.Errorf("timed out waiting for http %s", url)
}
func (c *distributedLockCluster) waitForTCP(addr string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
conn, err := net.DialTimeout("tcp", addr, time.Second)
if err == nil {
_ = conn.Close()
return nil
}
time.Sleep(200 * time.Millisecond)
}
return fmt.Errorf("timed out waiting for tcp %s", addr)
}
func (c *distributedLockCluster) openLog(name string) (*os.File, error) {
f, err := os.Create(filepath.Join(c.logsDir, name))
if err != nil {
return nil, err
}
c.logFiles = append(c.logFiles, f)
return f, nil
}
func (c *distributedLockCluster) tailLog(name string) string {
f, err := os.Open(filepath.Join(c.logsDir, name))
if err != nil {
return ""
}
defer f.Close()
scanner := bufio.NewScanner(f)
lines := make([]string, 0, 40)
for scanner.Scan() {
lines = append(lines, scanner.Text())
if len(lines) > 40 {
lines = lines[1:]
}
}
return strings.Join(lines, "\n")
}
func allocatePorts(count int) ([]int, error) {
listeners := make([]net.Listener, 0, count)
ports := make([]int, 0, count)
for i := 0; i < count; i++ {
l, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
for _, openListener := range listeners {
_ = openListener.Close()
}
return nil, err
}
listeners = append(listeners, l)
ports = append(ports, l.Addr().(*net.TCPAddr).Port)
}
for _, l := range listeners {
_ = l.Close()
}
return ports, nil
}
func stopProcess(cmd *exec.Cmd) {
if cmd == nil || cmd.Process == nil {
return
}
_ = cmd.Process.Signal(os.Interrupt)
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
}()
select {
case <-done:
case <-time.After(10 * time.Second):
_ = cmd.Process.Kill()
<-done
}
}
type httpClientWithDialer struct {
dialer *net.Dialer
}
func (h *httpClientWithDialer) Get(url string) error {
client := &http.Client{
Timeout: time.Second,
Transport: &http.Transport{
DialContext: h.dialer.DialContext,
},
}
resp, err := client.Get(url)
if err != nil {
return err
}
_ = resp.Body.Close()
return nil
}

View File

@@ -0,0 +1,181 @@
package distributed_lock
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"sort"
"strings"
"sync"
"testing"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/smithy-go"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestConditionalPutIfNoneMatchDistributedLockAcrossS3Gateways(t *testing.T) {
if testing.Short() {
t.Skip("skipping distributed lock integration test in short mode")
}
cluster := startDistributedLockCluster(t)
clientA := cluster.newS3Client(t, cluster.s3Endpoint(0))
clientB := cluster.newS3Client(t, cluster.s3Endpoint(1))
bucket := fmt.Sprintf("distributed-lock-%d", time.Now().UnixNano())
_, err := clientA.CreateBucket(context.Background(), &s3.CreateBucketInput{
Bucket: aws.String(bucket),
})
require.NoError(t, err)
require.Eventually(t, func() bool {
_, err := clientB.HeadBucket(context.Background(), &s3.HeadBucketInput{
Bucket: aws.String(bucket),
})
return err == nil
}, 30*time.Second, 200*time.Millisecond, "bucket should replicate to the second filer-backed gateway")
keysByOwner := cluster.findLockOwnerKeys(bucket, "conditional-put")
require.Len(t, keysByOwner, len(cluster.filerPorts), "should exercise both filer lock owners")
for owner, key := range keysByOwner {
owner := owner
key := key
t.Run(lockOwnerLabel(owner), func(t *testing.T) {
runConditionalPutRace(t, []s3RaceClient{
{name: "s3-a", client: clientA},
{name: "s3-b", client: clientB},
}, bucket, key)
})
}
}
type s3RaceClient struct {
name string
client *s3.Client
}
type putAttemptResult struct {
clientName string
body string
err error
}
func runConditionalPutRace(t *testing.T, clients []s3RaceClient, bucket, key string) {
t.Helper()
start := make(chan struct{})
results := make(chan putAttemptResult, len(clients)*2)
var wg sync.WaitGroup
for _, client := range clients {
for attempt := 0; attempt < 2; attempt++ {
wg.Add(1)
body := fmt.Sprintf("%s-attempt-%d", client.name, attempt)
go func(client s3RaceClient, body string) {
defer wg.Done()
<-start
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
_, err := client.client.PutObject(ctx, &s3.PutObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
IfNoneMatch: aws.String("*"),
Body: bytes.NewReader([]byte(body)),
})
results <- putAttemptResult{
clientName: client.name,
body: body,
err: err,
}
}(client, body)
}
}
close(start)
wg.Wait()
close(results)
successes := 0
preconditionFailures := 0
winnerBody := ""
unexpectedErrors := make([]string, 0)
for result := range results {
if result.err == nil {
successes++
winnerBody = result.body
continue
}
if isPreconditionFailed(result.err) {
preconditionFailures++
continue
}
unexpectedErrors = append(unexpectedErrors, fmt.Sprintf("%s: %v", result.clientName, result.err))
}
require.Empty(t, unexpectedErrors, "unexpected race errors")
require.Equal(t, 1, successes, "exactly one write should win")
require.Equal(t, len(clients)*2-1, preconditionFailures, "all losing writes should fail with 412")
object, err := clients[0].client.GetObject(context.Background(), &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
})
require.NoError(t, err)
defer object.Body.Close()
data, err := io.ReadAll(object.Body)
require.NoError(t, err)
assert.Equal(t, winnerBody, string(data), "stored object body should match the successful request")
}
func isPreconditionFailed(err error) bool {
var apiErr smithy.APIError
return errors.As(err, &apiErr) && apiErr.ErrorCode() == "PreconditionFailed"
}
func (c *distributedLockCluster) findLockOwnerKeys(bucket, prefix string) map[pb.ServerAddress]string {
owners := make([]pb.ServerAddress, 0, len(c.filerPorts))
for i := range c.filerPorts {
owners = append(owners, c.filerServerAddress(i))
}
sort.Slice(owners, func(i, j int) bool {
return owners[i] < owners[j]
})
keysByOwner := make(map[pb.ServerAddress]string, len(owners))
for i := 0; i < 1024 && len(keysByOwner) < len(owners); i++ {
key := fmt.Sprintf("%s-%03d.txt", prefix, i)
lockOwner := ownerForObjectLock(bucket, key, owners)
if _, exists := keysByOwner[lockOwner]; !exists {
keysByOwner[lockOwner] = key
}
}
return keysByOwner
}
func ownerForObjectLock(bucket, object string, owners []pb.ServerAddress) pb.ServerAddress {
lockKey := fmt.Sprintf("s3.object.write:/buckets/%s/%s", bucket, s3_constants.NormalizeObjectKey(object))
hash := util.HashStringToLong(lockKey)
if hash < 0 {
hash = -hash
}
return owners[hash%int64(len(owners))]
}
func lockOwnerLabel(owner pb.ServerAddress) string {
replacer := strings.NewReplacer(":", "_", ".", "_")
return "owner_" + replacer.Replace(string(owner))
}

View File

@@ -0,0 +1,147 @@
package s3api
import (
"bytes"
"context"
"fmt"
"io"
"net/url"
"testing"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func versioningCopySource(bucketName, key string) string {
return fmt.Sprintf("%s/%s", bucketName, url.PathEscape(key))
}
func suspendVersioning(t *testing.T, client *s3.Client, bucketName string) {
_, err := client.PutBucketVersioning(context.TODO(), &s3.PutBucketVersioningInput{
Bucket: aws.String(bucketName),
VersioningConfiguration: &types.VersioningConfiguration{
Status: types.BucketVersioningStatusSuspended,
},
})
require.NoError(t, err)
}
func TestVersioningSelfCopyMetadataReplaceCreatesNewVersion(t *testing.T) {
client := getS3Client(t)
bucketName := getNewBucketName()
createBucket(t, client, bucketName)
defer deleteBucket(t, client, bucketName)
enableVersioning(t, client, bucketName)
checkVersioningStatus(t, client, bucketName, types.BucketVersioningStatusEnabled)
objectKey := "self-copy-versioned.txt"
initialContent := []byte("copy me without changing the body")
putResp, err := client.PutObject(context.TODO(), &s3.PutObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
Body: bytes.NewReader(initialContent),
Metadata: map[string]string{"stage": "one"},
})
require.NoError(t, err)
require.NotNil(t, putResp.VersionId)
copyResp, err := client.CopyObject(context.TODO(), &s3.CopyObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
CopySource: aws.String(versioningCopySource(bucketName, objectKey)),
Metadata: map[string]string{"stage": "two"},
MetadataDirective: types.MetadataDirectiveReplace,
})
require.NoError(t, err, "Self-copy with metadata replacement should succeed")
require.NotNil(t, copyResp.VersionId, "Versioned self-copy should create a new version")
require.NotEqual(t, *putResp.VersionId, *copyResp.VersionId, "Self-copy should create a distinct version")
headLatestResp, err := client.HeadObject(context.TODO(), &s3.HeadObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
})
require.NoError(t, err)
assert.Equal(t, "two", headLatestResp.Metadata["stage"], "Latest version should expose replaced metadata")
headOriginalResp, err := client.HeadObject(context.TODO(), &s3.HeadObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
VersionId: putResp.VersionId,
})
require.NoError(t, err)
assert.Equal(t, "one", headOriginalResp.Metadata["stage"], "Previous version metadata should remain intact")
getResp, err := client.GetObject(context.TODO(), &s3.GetObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
})
require.NoError(t, err)
defer getResp.Body.Close()
body, err := io.ReadAll(getResp.Body)
require.NoError(t, err)
assert.Equal(t, initialContent, body, "Self-copy should not alter the object body")
versionsResp, err := client.ListObjectVersions(context.TODO(), &s3.ListObjectVersionsInput{
Bucket: aws.String(bucketName),
Prefix: aws.String(objectKey),
})
require.NoError(t, err)
require.Len(t, versionsResp.Versions, 2, "Self-copy should append a new current version")
assert.Equal(t, *copyResp.VersionId, *versionsResp.Versions[0].VersionId, "New copy version should be latest")
}
func TestVersioningSelfCopyMetadataReplaceSuspendedKeepsNullVersion(t *testing.T) {
client := getS3Client(t)
bucketName := getNewBucketName()
createBucket(t, client, bucketName)
defer deleteBucket(t, client, bucketName)
enableVersioning(t, client, bucketName)
suspendVersioning(t, client, bucketName)
checkVersioningStatus(t, client, bucketName, types.BucketVersioningStatusSuspended)
objectKey := "self-copy-suspended.txt"
initialContent := []byte("null version content")
_, err := client.PutObject(context.TODO(), &s3.PutObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
Body: bytes.NewReader(initialContent),
Metadata: map[string]string{"stage": "one"},
})
require.NoError(t, err)
copyResp, err := client.CopyObject(context.TODO(), &s3.CopyObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
CopySource: aws.String(versioningCopySource(bucketName, objectKey)),
Metadata: map[string]string{"stage": "two"},
MetadataDirective: types.MetadataDirectiveReplace,
})
require.NoError(t, err, "Suspended self-copy with metadata replacement should succeed")
assert.Nil(t, copyResp.VersionId, "Suspended versioning should not return a version header for the current null version")
headResp, err := client.HeadObject(context.TODO(), &s3.HeadObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
})
require.NoError(t, err)
assert.Equal(t, "two", headResp.Metadata["stage"], "Null current version should be updated in place")
versionsResp, err := client.ListObjectVersions(context.TODO(), &s3.ListObjectVersionsInput{
Bucket: aws.String(bucketName),
Prefix: aws.String(objectKey),
})
require.NoError(t, err)
require.Len(t, versionsResp.Versions, 1, "Suspended self-copy should keep a single null current version")
require.NotNil(t, versionsResp.Versions[0].VersionId)
assert.Equal(t, "null", *versionsResp.Versions[0].VersionId, "Suspended self-copy should preserve null-version semantics")
assert.True(t, *versionsResp.Versions[0].IsLatest, "Null version should remain latest")
}

View File

@@ -0,0 +1,82 @@
package s3api
import (
"bytes"
"context"
"io"
"testing"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSuspendedDeleteCreatesDeleteMarker(t *testing.T) {
client := getS3Client(t)
bucketName := getNewBucketName()
createBucket(t, client, bucketName)
defer deleteBucket(t, client, bucketName)
enableVersioning(t, client, bucketName)
objectKey := "suspended-delete-marker.txt"
versionedResp, err := client.PutObject(context.TODO(), &s3.PutObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
Body: bytes.NewReader([]byte("versioned-content")),
})
require.NoError(t, err)
require.NotNil(t, versionedResp.VersionId)
suspendVersioning(t, client, bucketName)
_, err = client.PutObject(context.TODO(), &s3.PutObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
Body: bytes.NewReader([]byte("null-version-content")),
})
require.NoError(t, err)
deleteResp, err := client.DeleteObject(context.TODO(), &s3.DeleteObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
})
require.NoError(t, err)
require.NotNil(t, deleteResp.DeleteMarker)
assert.True(t, *deleteResp.DeleteMarker)
require.NotNil(t, deleteResp.VersionId)
listResp, err := client.ListObjectVersions(context.TODO(), &s3.ListObjectVersionsInput{
Bucket: aws.String(bucketName),
})
require.NoError(t, err)
require.Len(t, listResp.DeleteMarkers, 1)
deleteMarker := listResp.DeleteMarkers[0]
require.NotNil(t, deleteMarker.Key)
assert.Equal(t, objectKey, *deleteMarker.Key)
require.NotNil(t, deleteMarker.VersionId)
assert.Equal(t, *deleteResp.VersionId, *deleteMarker.VersionId)
require.NotNil(t, deleteMarker.IsLatest)
assert.True(t, *deleteMarker.IsLatest)
_, err = client.GetObject(context.TODO(), &s3.GetObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
})
require.Error(t, err)
getVersionedResp, err := client.GetObject(context.TODO(), &s3.GetObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
VersionId: versionedResp.VersionId,
})
require.NoError(t, err)
defer getVersionedResp.Body.Close()
body, err := io.ReadAll(getVersionedResp.Body)
require.NoError(t, err)
assert.Equal(t, "versioned-content", string(body))
}

View File

@@ -499,12 +499,16 @@ func TestMultipartUploadDeleteMarkerListBehavior(t *testing.T) {
t.Logf("Successfully retrieved version %s after delete marker", multipartVersionId)
// Delete the delete marker to "undelete" the object
_, err = client.DeleteObject(context.TODO(), &s3.DeleteObjectInput{
undeleteResp, err := client.DeleteObject(context.TODO(), &s3.DeleteObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
VersionId: aws.String(deleteMarkerVersionId),
})
require.NoError(t, err, "Failed to delete the delete marker")
require.NotNil(t, undeleteResp.DeleteMarker, "Deleting a delete marker version should report DeleteMarker=true")
assert.True(t, *undeleteResp.DeleteMarker, "Deleting a delete marker version should report DeleteMarker=true")
require.NotNil(t, undeleteResp.VersionId, "Deleting a delete marker version should echo the version ID")
assert.Equal(t, deleteMarkerVersionId, *undeleteResp.VersionId, "DeleteObject should report the deleted delete-marker version ID")
// ListObjectsV2 should show the object again
listAfterUndelete, err := client.ListObjectsV2(context.TODO(), &s3.ListObjectsV2Input{
@@ -518,3 +522,76 @@ func TestMultipartUploadDeleteMarkerListBehavior(t *testing.T) {
t.Logf("Object restored after delete marker removal, ETag=%s", multipartETag)
}
func TestVersioningCompleteMultipartUploadIsIdempotent(t *testing.T) {
client := getS3Client(t)
bucketName := getNewBucketName()
createBucket(t, client, bucketName)
defer deleteBucket(t, client, bucketName)
enableVersioning(t, client, bucketName)
checkVersioningStatus(t, client, bucketName, types.BucketVersioningStatusEnabled)
objectKey := "multipart-idempotent-object"
createResp, err := client.CreateMultipartUpload(context.TODO(), &s3.CreateMultipartUploadInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
})
require.NoError(t, err, "Failed to create multipart upload")
partSize := 5 * 1024 * 1024
part1Data := bytes.Repeat([]byte("i"), partSize)
part2Data := bytes.Repeat([]byte("j"), partSize)
uploadPart1Resp, err := client.UploadPart(context.TODO(), &s3.UploadPartInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
UploadId: createResp.UploadId,
PartNumber: aws.Int32(1),
Body: bytes.NewReader(part1Data),
})
require.NoError(t, err, "Failed to upload first part")
uploadPart2Resp, err := client.UploadPart(context.TODO(), &s3.UploadPartInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
UploadId: createResp.UploadId,
PartNumber: aws.Int32(2),
Body: bytes.NewReader(part2Data),
})
require.NoError(t, err, "Failed to upload second part")
completeInput := &s3.CompleteMultipartUploadInput{
Bucket: aws.String(bucketName),
Key: aws.String(objectKey),
UploadId: createResp.UploadId,
MultipartUpload: &types.CompletedMultipartUpload{
Parts: []types.CompletedPart{
{ETag: uploadPart1Resp.ETag, PartNumber: aws.Int32(1)},
{ETag: uploadPart2Resp.ETag, PartNumber: aws.Int32(2)},
},
},
}
firstCompleteResp, err := client.CompleteMultipartUpload(context.TODO(), completeInput)
require.NoError(t, err, "First CompleteMultipartUpload should succeed")
require.NotNil(t, firstCompleteResp.ETag)
require.NotNil(t, firstCompleteResp.VersionId)
secondCompleteResp, err := client.CompleteMultipartUpload(context.TODO(), completeInput)
require.NoError(t, err, "Repeated CompleteMultipartUpload should return the existing object instead of creating a second version")
require.NotNil(t, secondCompleteResp.ETag)
require.NotNil(t, secondCompleteResp.VersionId, "Repeated complete should report the existing version ID")
assert.Equal(t, *firstCompleteResp.ETag, *secondCompleteResp.ETag, "Repeated complete should report the same ETag")
assert.Equal(t, *firstCompleteResp.VersionId, *secondCompleteResp.VersionId, "Repeated complete should report the same version ID")
versionsResp, err := client.ListObjectVersions(context.TODO(), &s3.ListObjectVersionsInput{
Bucket: aws.String(bucketName),
Prefix: aws.String(objectKey),
})
require.NoError(t, err, "Failed to list object versions")
require.Len(t, versionsResp.Versions, 1, "Repeated completion must not create a duplicate version")
assert.Equal(t, *firstCompleteResp.VersionId, *versionsResp.Versions[0].VersionId, "The original multipart version should remain current")
assert.Empty(t, versionsResp.DeleteMarkers, "Repeated completion should not create delete markers")
}