test: fix master client timeout causing test hangs (#7715)
* test: fix master client timeout causing test hangs Use the main test context for KeepConnectedToMaster instead of creating a separate 60s context. The tests have 180s outer timeouts but the master client was disconnecting after 60s, causing subsequent commands to hang waiting for reconnection. * test: add -peers=none to all test masters and timeout for lock - Add -peers=none flag to all master servers for faster startup - Add tryLockWithTimeout helper to avoid tests hanging on lock acquisition - Skip tests if lock cannot be acquired within 30 seconds * test: extract connectToMasterAndSync helper to reduce duplication * test: fix captureCommandOutput pipe deadlock Close write end of pipe before calling io.ReadAll to signal EOF, otherwise ReadAll blocks forever waiting for more data. * test: fix tryLockWithTimeout to check lock command errors Propagate lock command error through channel and only treat as locked if command succeeded. Previously any completion (including errors) was treated as successful lock acquisition.
This commit is contained in:
@@ -96,13 +96,12 @@ func TestECEncodingVolumeLocationTimingBug(t *testing.T) {
|
|||||||
// This simulates the race condition where EC encoding updates master metadata
|
// This simulates the race condition where EC encoding updates master metadata
|
||||||
// but volume location collection happens after that update
|
// but volume location collection happens after that update
|
||||||
|
|
||||||
// First acquire the lock (required for EC encode)
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err = lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
|
|
||||||
// Execute EC encoding - test the timing directly
|
// Execute EC encoding - test the timing directly
|
||||||
var encodeOutput bytes.Buffer
|
var encodeOutput bytes.Buffer
|
||||||
@@ -651,6 +650,55 @@ type commandRunner interface {
|
|||||||
Do([]string, *shell.CommandEnv, io.Writer) error
|
Do([]string, *shell.CommandEnv, io.Writer) error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tryLockWithTimeout attempts to acquire the shell lock with a timeout.
|
||||||
|
// Returns true if lock was acquired, false if timeout or error occurred.
|
||||||
|
// If lock was acquired, the caller must call the returned unlock function.
|
||||||
|
func tryLockWithTimeout(t *testing.T, commandEnv *shell.CommandEnv, timeout time.Duration) (locked bool, unlock func()) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
type lockResult struct {
|
||||||
|
err error
|
||||||
|
output string
|
||||||
|
}
|
||||||
|
|
||||||
|
lockDone := make(chan lockResult, 1)
|
||||||
|
go func() {
|
||||||
|
lockCmd := shell.Commands[findCommandIndex("lock")]
|
||||||
|
var lockOutput bytes.Buffer
|
||||||
|
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
||||||
|
lockDone <- lockResult{err: err, output: lockOutput.String()}
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case res := <-lockDone:
|
||||||
|
if res.err != nil {
|
||||||
|
t.Logf("lock command failed: %v, output: %s", res.err, res.output)
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
||||||
|
return true, func() {
|
||||||
|
var unlockOutput bytes.Buffer
|
||||||
|
_ = unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
||||||
|
}
|
||||||
|
case <-time.After(timeout):
|
||||||
|
t.Logf("timed out acquiring lock after %s", timeout)
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// connectToMasterAndSync connects the command environment to the master and waits for sync.
|
||||||
|
// This helper reduces code duplication across test functions.
|
||||||
|
func connectToMasterAndSync(ctx context.Context, t *testing.T, commandEnv *shell.CommandEnv) {
|
||||||
|
t.Helper()
|
||||||
|
// Connect to master - use the main test context to avoid early disconnection
|
||||||
|
go commandEnv.MasterClient.KeepConnectedToMaster(ctx)
|
||||||
|
commandEnv.MasterClient.WaitUntilConnected(ctx)
|
||||||
|
|
||||||
|
// Wait for master client to fully sync
|
||||||
|
t.Log("Waiting for master client to sync...")
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
// captureCommandOutput executes a shell command and captures its output from both
|
// captureCommandOutput executes a shell command and captures its output from both
|
||||||
// stdout/stderr and the command's buffer. This reduces code duplication in tests.
|
// stdout/stderr and the command's buffer. This reduces code duplication in tests.
|
||||||
func captureCommandOutput(t *testing.T, cmd commandRunner, args []string, commandEnv *shell.CommandEnv) (output string, err error) {
|
func captureCommandOutput(t *testing.T, cmd commandRunner, args []string, commandEnv *shell.CommandEnv) (output string, err error) {
|
||||||
@@ -661,17 +709,16 @@ func captureCommandOutput(t *testing.T, cmd commandRunner, args []string, comman
|
|||||||
r, w, pipeErr := os.Pipe()
|
r, w, pipeErr := os.Pipe()
|
||||||
require.NoError(t, pipeErr)
|
require.NoError(t, pipeErr)
|
||||||
|
|
||||||
defer func() {
|
|
||||||
_ = w.Close()
|
|
||||||
os.Stdout = oldStdout
|
|
||||||
os.Stderr = oldStderr
|
|
||||||
}()
|
|
||||||
|
|
||||||
os.Stdout = w
|
os.Stdout = w
|
||||||
os.Stderr = w
|
os.Stderr = w
|
||||||
|
|
||||||
cmdErr := cmd.Do(args, commandEnv, &outBuf)
|
cmdErr := cmd.Do(args, commandEnv, &outBuf)
|
||||||
|
|
||||||
|
// Close write end BEFORE reading to signal EOF to the reader
|
||||||
|
_ = w.Close()
|
||||||
|
os.Stdout = oldStdout
|
||||||
|
os.Stderr = oldStderr
|
||||||
|
|
||||||
capturedOutput, readErr := io.ReadAll(r)
|
capturedOutput, readErr := io.ReadAll(r)
|
||||||
_ = r.Close()
|
_ = r.Close()
|
||||||
require.NoError(t, readErr)
|
require.NoError(t, readErr)
|
||||||
@@ -753,14 +800,7 @@ func TestDiskAwareECRebalancing(t *testing.T) {
|
|||||||
}
|
}
|
||||||
commandEnv := shell.NewCommandEnv(options)
|
commandEnv := shell.NewCommandEnv(options)
|
||||||
|
|
||||||
// Connect to master with longer timeout
|
connectToMasterAndSync(ctx, t, commandEnv)
|
||||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 60*time.Second)
|
|
||||||
defer cancel2()
|
|
||||||
go commandEnv.MasterClient.KeepConnectedToMaster(ctx2)
|
|
||||||
commandEnv.MasterClient.WaitUntilConnected(ctx2)
|
|
||||||
|
|
||||||
// Wait for master client to fully sync
|
|
||||||
time.Sleep(5 * time.Second)
|
|
||||||
|
|
||||||
// Upload test data to create a volume - retry if volumes not ready
|
// Upload test data to create a volume - retry if volumes not ready
|
||||||
var volumeId needle.VolumeId
|
var volumeId needle.VolumeId
|
||||||
@@ -1194,14 +1234,7 @@ func TestECDiskTypeSupport(t *testing.T) {
|
|||||||
}
|
}
|
||||||
commandEnv := shell.NewCommandEnv(options)
|
commandEnv := shell.NewCommandEnv(options)
|
||||||
|
|
||||||
// Connect to master with longer timeout
|
connectToMasterAndSync(ctx, t, commandEnv)
|
||||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 60*time.Second)
|
|
||||||
defer cancel2()
|
|
||||||
go commandEnv.MasterClient.KeepConnectedToMaster(ctx2)
|
|
||||||
commandEnv.MasterClient.WaitUntilConnected(ctx2)
|
|
||||||
|
|
||||||
// Wait for master client to fully sync
|
|
||||||
time.Sleep(5 * time.Second)
|
|
||||||
|
|
||||||
// Upload test data to create a volume - retry if volumes not ready
|
// Upload test data to create a volume - retry if volumes not ready
|
||||||
var volumeId needle.VolumeId
|
var volumeId needle.VolumeId
|
||||||
@@ -1228,19 +1261,12 @@ func TestECDiskTypeSupport(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("ec_encode_with_ssd_disktype", func(t *testing.T) {
|
t.Run("ec_encode_with_ssd_disktype", func(t *testing.T) {
|
||||||
// Get lock first
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
// Defer unlock to ensure it's always released
|
|
||||||
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
|
||||||
var unlockOutput bytes.Buffer
|
|
||||||
defer unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
|
||||||
|
|
||||||
// Execute EC encoding with SSD disk type
|
// Execute EC encoding with SSD disk type
|
||||||
ecEncodeCmd := shell.Commands[findCommandIndex("ec.encode")]
|
ecEncodeCmd := shell.Commands[findCommandIndex("ec.encode")]
|
||||||
@@ -1270,19 +1296,12 @@ func TestECDiskTypeSupport(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("ec_balance_with_ssd_disktype", func(t *testing.T) {
|
t.Run("ec_balance_with_ssd_disktype", func(t *testing.T) {
|
||||||
// Get lock first
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
// Defer unlock to ensure it's always released
|
|
||||||
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
|
||||||
var unlockOutput bytes.Buffer
|
|
||||||
defer unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
|
||||||
|
|
||||||
// Execute EC balance with SSD disk type
|
// Execute EC balance with SSD disk type
|
||||||
ecBalanceCmd := shell.Commands[findCommandIndex("ec.balance")]
|
ecBalanceCmd := shell.Commands[findCommandIndex("ec.balance")]
|
||||||
@@ -1325,19 +1344,12 @@ func TestECDiskTypeSupport(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("ec_encode_with_source_disktype", func(t *testing.T) {
|
t.Run("ec_encode_with_source_disktype", func(t *testing.T) {
|
||||||
// Test that -sourceDiskType flag is accepted
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
// Defer unlock to ensure it's always released
|
|
||||||
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
|
||||||
var unlockOutput bytes.Buffer
|
|
||||||
defer unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
|
||||||
|
|
||||||
// Execute EC encoding with sourceDiskType filter
|
// Execute EC encoding with sourceDiskType filter
|
||||||
ecEncodeCmd := shell.Commands[findCommandIndex("ec.encode")]
|
ecEncodeCmd := shell.Commands[findCommandIndex("ec.encode")]
|
||||||
@@ -1366,19 +1378,12 @@ func TestECDiskTypeSupport(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("ec_decode_with_disktype", func(t *testing.T) {
|
t.Run("ec_decode_with_disktype", func(t *testing.T) {
|
||||||
// Test that ec.decode accepts -diskType flag
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
// Defer unlock to ensure it's always released
|
|
||||||
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
|
||||||
var unlockOutput bytes.Buffer
|
|
||||||
defer unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
|
||||||
|
|
||||||
// Execute EC decode with disk type
|
// Execute EC decode with disk type
|
||||||
ecDecodeCmd := shell.Commands[findCommandIndex("ec.decode")]
|
ecDecodeCmd := shell.Commands[findCommandIndex("ec.decode")]
|
||||||
@@ -1424,6 +1429,7 @@ func startClusterWithDiskType(ctx context.Context, dataDir string, diskType stri
|
|||||||
"-mdir", masterDir,
|
"-mdir", masterDir,
|
||||||
"-volumeSizeLimitMB", "10",
|
"-volumeSizeLimitMB", "10",
|
||||||
"-ip", "127.0.0.1",
|
"-ip", "127.0.0.1",
|
||||||
|
"-peers", "none",
|
||||||
)
|
)
|
||||||
|
|
||||||
masterLogFile, err := os.Create(filepath.Join(masterDir, "master.log"))
|
masterLogFile, err := os.Create(filepath.Join(masterDir, "master.log"))
|
||||||
@@ -1569,14 +1575,7 @@ func TestECDiskTypeMixedCluster(t *testing.T) {
|
|||||||
}
|
}
|
||||||
commandEnv := shell.NewCommandEnv(options)
|
commandEnv := shell.NewCommandEnv(options)
|
||||||
|
|
||||||
// Connect to master with longer timeout
|
connectToMasterAndSync(ctx, t, commandEnv)
|
||||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 60*time.Second)
|
|
||||||
defer cancel2()
|
|
||||||
go commandEnv.MasterClient.KeepConnectedToMaster(ctx2)
|
|
||||||
commandEnv.MasterClient.WaitUntilConnected(ctx2)
|
|
||||||
|
|
||||||
// Wait for master client to fully sync
|
|
||||||
time.Sleep(5 * time.Second)
|
|
||||||
|
|
||||||
t.Run("upload_to_ssd_and_hdd", func(t *testing.T) {
|
t.Run("upload_to_ssd_and_hdd", func(t *testing.T) {
|
||||||
// Upload to SSD
|
// Upload to SSD
|
||||||
@@ -1611,19 +1610,12 @@ func TestECDiskTypeMixedCluster(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("ec_balance_targets_correct_disk_type", func(t *testing.T) {
|
t.Run("ec_balance_targets_correct_disk_type", func(t *testing.T) {
|
||||||
// Get lock first
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
// Defer unlock to ensure it's always released
|
|
||||||
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
|
||||||
var unlockOutput bytes.Buffer
|
|
||||||
defer unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
|
||||||
|
|
||||||
// Run ec.balance for SSD collection with -diskType=ssd
|
// Run ec.balance for SSD collection with -diskType=ssd
|
||||||
var ssdOutput bytes.Buffer
|
var ssdOutput bytes.Buffer
|
||||||
@@ -1669,6 +1661,7 @@ func startMixedDiskTypeCluster(ctx context.Context, dataDir string) (*MultiDiskC
|
|||||||
"-mdir", masterDir,
|
"-mdir", masterDir,
|
||||||
"-volumeSizeLimitMB", "10",
|
"-volumeSizeLimitMB", "10",
|
||||||
"-ip", "127.0.0.1",
|
"-ip", "127.0.0.1",
|
||||||
|
"-peers", "none",
|
||||||
)
|
)
|
||||||
|
|
||||||
masterLogFile, err := os.Create(filepath.Join(masterDir, "master.log"))
|
masterLogFile, err := os.Create(filepath.Join(masterDir, "master.log"))
|
||||||
@@ -1770,12 +1763,7 @@ func TestEvacuationFallbackBehavior(t *testing.T) {
|
|||||||
}
|
}
|
||||||
commandEnv := shell.NewCommandEnv(options)
|
commandEnv := shell.NewCommandEnv(options)
|
||||||
|
|
||||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 60*time.Second)
|
connectToMasterAndSync(ctx, t, commandEnv)
|
||||||
defer cancel2()
|
|
||||||
go commandEnv.MasterClient.KeepConnectedToMaster(ctx2)
|
|
||||||
commandEnv.MasterClient.WaitUntilConnected(ctx2)
|
|
||||||
|
|
||||||
time.Sleep(5 * time.Second)
|
|
||||||
|
|
||||||
t.Run("fallback_when_same_disktype_full", func(t *testing.T) {
|
t.Run("fallback_when_same_disktype_full", func(t *testing.T) {
|
||||||
// This test verifies that when evacuating SSD EC shards from a server,
|
// This test verifies that when evacuating SSD EC shards from a server,
|
||||||
@@ -1800,18 +1788,12 @@ func TestEvacuationFallbackBehavior(t *testing.T) {
|
|||||||
|
|
||||||
time.Sleep(3 * time.Second)
|
time.Sleep(3 * time.Second)
|
||||||
|
|
||||||
// Get lock
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
|
||||||
var unlockOutput bytes.Buffer
|
|
||||||
defer unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
|
||||||
|
|
||||||
// EC encode the SSD volume
|
// EC encode the SSD volume
|
||||||
var encodeOutput bytes.Buffer
|
var encodeOutput bytes.Buffer
|
||||||
@@ -1875,12 +1857,7 @@ func TestCrossRackECPlacement(t *testing.T) {
|
|||||||
}
|
}
|
||||||
commandEnv := shell.NewCommandEnv(options)
|
commandEnv := shell.NewCommandEnv(options)
|
||||||
|
|
||||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 60*time.Second)
|
connectToMasterAndSync(ctx, t, commandEnv)
|
||||||
defer cancel2()
|
|
||||||
go commandEnv.MasterClient.KeepConnectedToMaster(ctx2)
|
|
||||||
commandEnv.MasterClient.WaitUntilConnected(ctx2)
|
|
||||||
|
|
||||||
time.Sleep(5 * time.Second)
|
|
||||||
|
|
||||||
// Upload test data
|
// Upload test data
|
||||||
testData := []byte("Cross-rack EC placement test data - needs to be distributed across racks")
|
testData := []byte("Cross-rack EC placement test data - needs to be distributed across racks")
|
||||||
@@ -1899,18 +1876,12 @@ func TestCrossRackECPlacement(t *testing.T) {
|
|||||||
time.Sleep(3 * time.Second)
|
time.Sleep(3 * time.Second)
|
||||||
|
|
||||||
t.Run("ec_encode_cross_rack", func(t *testing.T) {
|
t.Run("ec_encode_cross_rack", func(t *testing.T) {
|
||||||
// Get lock
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
|
||||||
var unlockOutput bytes.Buffer
|
|
||||||
defer unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
|
||||||
|
|
||||||
// EC encode with rack-aware placement
|
// EC encode with rack-aware placement
|
||||||
// Note: uploadTestDataToMaster uses collection "test" by default
|
// Note: uploadTestDataToMaster uses collection "test" by default
|
||||||
@@ -1955,18 +1926,12 @@ func TestCrossRackECPlacement(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("ec_balance_respects_rack_placement", func(t *testing.T) {
|
t.Run("ec_balance_respects_rack_placement", func(t *testing.T) {
|
||||||
// Get lock
|
// Try to get lock with timeout to avoid hanging
|
||||||
lockCmd := shell.Commands[findCommandIndex("lock")]
|
locked, unlock := tryLockWithTimeout(t, commandEnv, 30*time.Second)
|
||||||
var lockOutput bytes.Buffer
|
if !locked {
|
||||||
err := lockCmd.Do([]string{}, commandEnv, &lockOutput)
|
t.Skip("Could not acquire lock within timeout - master may not be ready")
|
||||||
if err != nil {
|
|
||||||
t.Logf("Lock command failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
defer unlock()
|
||||||
unlockCmd := shell.Commands[findCommandIndex("unlock")]
|
|
||||||
var unlockOutput bytes.Buffer
|
|
||||||
defer unlockCmd.Do([]string{}, commandEnv, &unlockOutput)
|
|
||||||
|
|
||||||
initialDistribution := countShardsPerRack(testDir, uint32(volumeId))
|
initialDistribution := countShardsPerRack(testDir, uint32(volumeId))
|
||||||
t.Logf("Initial rack distribution: %v", initialDistribution)
|
t.Logf("Initial rack distribution: %v", initialDistribution)
|
||||||
@@ -2014,6 +1979,7 @@ func startLimitedSsdCluster(ctx context.Context, dataDir string) (*MultiDiskClus
|
|||||||
"-mdir", masterDir,
|
"-mdir", masterDir,
|
||||||
"-volumeSizeLimitMB", "10",
|
"-volumeSizeLimitMB", "10",
|
||||||
"-ip", "127.0.0.1",
|
"-ip", "127.0.0.1",
|
||||||
|
"-peers", "none",
|
||||||
)
|
)
|
||||||
|
|
||||||
masterLogFile, err := os.Create(filepath.Join(masterDir, "master.log"))
|
masterLogFile, err := os.Create(filepath.Join(masterDir, "master.log"))
|
||||||
@@ -2104,6 +2070,7 @@ func startMultiRackCluster(ctx context.Context, dataDir string) (*MultiDiskClust
|
|||||||
"-mdir", masterDir,
|
"-mdir", masterDir,
|
||||||
"-volumeSizeLimitMB", "10",
|
"-volumeSizeLimitMB", "10",
|
||||||
"-ip", "127.0.0.1",
|
"-ip", "127.0.0.1",
|
||||||
|
"-peers", "none",
|
||||||
)
|
)
|
||||||
|
|
||||||
masterLogFile, err := os.Create(filepath.Join(masterDir, "master.log"))
|
masterLogFile, err := os.Create(filepath.Join(masterDir, "master.log"))
|
||||||
|
|||||||
Reference in New Issue
Block a user