Files
seaweedFS/test/s3/distributed_lock/distributed_lock_test.go
Chris Lu 98714b9f70 fix(test): address flaky S3 distributed lock integration test (#8888)
* fix(test): address flaky S3 distributed lock integration test

Two root causes:

1. Lock ring convergence race: After waitForFilerCount(2) confirms the
   master sees both filers, there's a window where filer0's lock ring
   still only contains itself (master's LockRingUpdate broadcast is
   delayed by the 1s stabilization timer). During this window filer0
   considers itself primary for ALL keys, so both filers can
   independently grant the same lock.

   Fix: Add waitForLockRingConverged() that acquires the same lock
   through both filers and verifies mutual exclusion before proceeding.

2. Hash function mismatch: ownerForObjectLock used util.HashStringToLong
   (MD5 + modulo) to predict lock owners, but the production DLM uses
   CRC32 consistent hashing via HashRing. This meant the test could
   pick keys that route to the same filer, not exercising the
   cross-filer coordination it intended to test.

   Fix: Use lock_manager.NewHashRing + GetPrimary() to match production
   routing exactly.

* fix(test): verify lock denial reason in convergence check

Ensure the convergence check only returns true when the second lock
attempt is denied specifically because the lock is already owned,
avoiding false positives from transient errors.

* fix(test): check one key per primary filer in convergence wait

A single arbitrary key can false-pass: if its real primary is the filer
with the stale ring, mutual exclusion holds trivially because that filer
IS the correct primary. Generate one test key per distinct primary using
the same consistent-hash ring as production, so a stale ring on any
filer is caught deterministically.
2026-04-02 13:11:04 -07:00

171 lines
4.7 KiB
Go

package distributed_lock
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"strings"
"sync"
"testing"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/smithy-go"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestConditionalPutIfNoneMatchDistributedLockAcrossS3Gateways(t *testing.T) {
if testing.Short() {
t.Skip("skipping distributed lock integration test in short mode")
}
cluster := startDistributedLockCluster(t)
clientA := cluster.newS3Client(t, cluster.s3Endpoint(0))
clientB := cluster.newS3Client(t, cluster.s3Endpoint(1))
bucket := fmt.Sprintf("distributed-lock-%d", time.Now().UnixNano())
_, err := clientA.CreateBucket(context.Background(), &s3.CreateBucketInput{
Bucket: aws.String(bucket),
})
require.NoError(t, err)
require.Eventually(t, func() bool {
_, err := clientB.HeadBucket(context.Background(), &s3.HeadBucketInput{
Bucket: aws.String(bucket),
})
return err == nil
}, 30*time.Second, 200*time.Millisecond, "bucket should replicate to the second filer-backed gateway")
keysByOwner := cluster.findLockOwnerKeys(bucket, "conditional-put")
require.Len(t, keysByOwner, len(cluster.filerPorts), "should exercise both filer lock owners")
for owner, key := range keysByOwner {
owner := owner
key := key
t.Run(lockOwnerLabel(owner), func(t *testing.T) {
runConditionalPutRace(t, []s3RaceClient{
{name: "s3-a", client: clientA},
{name: "s3-b", client: clientB},
}, bucket, key)
})
}
}
type s3RaceClient struct {
name string
client *s3.Client
}
type putAttemptResult struct {
clientName string
body string
err error
}
func runConditionalPutRace(t *testing.T, clients []s3RaceClient, bucket, key string) {
t.Helper()
start := make(chan struct{})
results := make(chan putAttemptResult, len(clients))
var wg sync.WaitGroup
for _, client := range clients {
wg.Add(1)
body := fmt.Sprintf("%s-race", client.name)
go func(client s3RaceClient, body string) {
defer wg.Done()
<-start
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
_, err := client.client.PutObject(ctx, &s3.PutObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
IfNoneMatch: aws.String("*"),
Body: bytes.NewReader([]byte(body)),
})
results <- putAttemptResult{
clientName: client.name,
body: body,
err: err,
}
}(client, body)
}
close(start)
wg.Wait()
close(results)
successes := 0
preconditionFailures := 0
winnerBody := ""
unexpectedErrors := make([]string, 0)
for result := range results {
if result.err == nil {
successes++
winnerBody = result.body
continue
}
if isPreconditionFailed(result.err) {
preconditionFailures++
continue
}
unexpectedErrors = append(unexpectedErrors, fmt.Sprintf("%s: %v", result.clientName, result.err))
}
require.Empty(t, unexpectedErrors, "unexpected race errors")
require.Equal(t, 1, successes, "exactly one write should win")
require.Equal(t, len(clients)-1, preconditionFailures, "all losing writes should fail with 412")
object, err := clients[0].client.GetObject(context.Background(), &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
})
require.NoError(t, err)
defer object.Body.Close()
data, err := io.ReadAll(object.Body)
require.NoError(t, err)
assert.Equal(t, winnerBody, string(data), "stored object body should match the successful request")
}
func isPreconditionFailed(err error) bool {
var apiErr smithy.APIError
return errors.As(err, &apiErr) && apiErr.ErrorCode() == "PreconditionFailed"
}
func (c *distributedLockCluster) findLockOwnerKeys(bucket, prefix string) map[pb.ServerAddress]string {
owners := make([]pb.ServerAddress, 0, len(c.filerPorts))
for i := range c.filerPorts {
owners = append(owners, c.filerServerAddress(i))
}
ring := lock_manager.NewHashRing(lock_manager.DefaultVnodeCount)
ring.SetServers(owners)
keysByOwner := make(map[pb.ServerAddress]string, len(owners))
for i := 0; i < 1024 && len(keysByOwner) < len(owners); i++ {
key := fmt.Sprintf("%s-%03d.txt", prefix, i)
lockKey := fmt.Sprintf("s3.object.write:/buckets/%s/%s", bucket, s3_constants.NormalizeObjectKey(key))
lockOwner := ring.GetPrimary(lockKey)
if _, exists := keysByOwner[lockOwner]; !exists {
keysByOwner[lockOwner] = key
}
}
return keysByOwner
}
func lockOwnerLabel(owner pb.ServerAddress) string {
replacer := strings.NewReplacer(":", "_", ".", "_")
return "owner_" + replacer.Replace(string(owner))
}