test: restore coverage removed in PR #8360 (#8779)

* test: restore maintenance mode coverage in TestVolumeMarkReadonlyWritableErrorPaths

PR #8360 removed the maintenance mode assertions because the refactored
check ordering (volume lookup before maintenance check) caused the
original test to hit "not found" instead of "maintenance mode" — the
test used a non-existent volume ID.

Restore coverage by allocating a real volume, then verifying:
- existing volume in maintenance mode returns "maintenance mode"
- non-existent volume in maintenance mode still returns "not found"
  (validating the new check ordering)

* test: add coverage for ScrubVolume MarkBrokenVolumesReadonly flag

PR #8360 added the mark_broken_volumes_readonly field to ScrubVolumeRequest
but no tests exercised the new logic paths. Add three integration tests:

- HealthyVolume: flag is a no-op when scrub finds no broken volumes
- CorruptVolume: corrupted .idx triggers broken detection; without the
  flag the volume stays writable, with the flag it becomes read-only
- MaintenanceMode: makeVolumeReadonly fails under maintenance and
  ScrubVolume propagates the error via errors.Join

* refactor: extract CorruptIndexFile and EnableMaintenanceMode test helpers

Move duplicated idx corruption and maintenance mode setup into
framework.CorruptIndexFile() and framework.EnableMaintenanceMode()
helpers. Use defer for file close in the corruption helper.
This commit is contained in:
Chris Lu
2026-03-26 10:52:37 -07:00
committed by GitHub
parent e5cf2d2a19
commit aa12b51cbf
3 changed files with 208 additions and 11 deletions

View File

@@ -5,6 +5,8 @@ import (
"context"
"fmt"
"net/http"
"os"
"path/filepath"
"testing"
"time"
@@ -45,6 +47,39 @@ func UploadBytes(t testing.TB, client *http.Client, volumeURL, fid string, data
return DoRequest(t, client, req)
}
// CorruptIndexFile appends garbage bytes to a volume's .idx file on disk so
// that CheckIndexFile detects a size mismatch during scrub.
func CorruptIndexFile(t testing.TB, baseDir string, volumeID uint32) {
t.Helper()
idxPath := filepath.Join(baseDir, "volume", fmt.Sprintf("%d.idx", volumeID))
f, err := os.OpenFile(idxPath, os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
t.Fatalf("open idx file for corruption: %v", err)
}
defer f.Close()
if _, err := f.Write([]byte{0xDE, 0xAD}); err != nil {
t.Fatalf("corrupt idx file: %v", err)
}
}
// EnableMaintenanceMode puts the volume server into maintenance mode.
func EnableMaintenanceMode(t testing.TB, ctx context.Context, client volume_server_pb.VolumeServerClient) {
t.Helper()
stateResp, err := client.GetState(ctx, &volume_server_pb.GetStateRequest{})
if err != nil {
t.Fatalf("GetState failed: %v", err)
}
_, err = client.SetState(ctx, &volume_server_pb.SetStateRequest{
State: &volume_server_pb.VolumeServerState{
Maintenance: true,
Version: stateResp.GetState().GetVersion(),
},
})
if err != nil {
t.Fatalf("SetState maintenance=true failed: %v", err)
}
}
func ReadBytes(t testing.TB, client *http.Client, volumeURL, fid string) *http.Response {
t.Helper()

View File

@@ -97,9 +97,13 @@ func TestVolumeMarkReadonlyWritableErrorPaths(t *testing.T) {
conn, grpcClient := framework.DialVolumeServer(t, clusterHarness.VolumeGRPCAddress())
defer conn.Close()
const volumeID = uint32(75)
framework.AllocateVolume(t, grpcClient, volumeID, "")
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// non-existent volumes should return "not found"
_, err := grpcClient.VolumeMarkReadonly(ctx, &volume_server_pb.VolumeMarkReadonlyRequest{VolumeId: 98771, Persist: true})
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Fatalf("VolumeMarkReadonly missing-volume error mismatch: %v", err)
@@ -110,18 +114,30 @@ func TestVolumeMarkReadonlyWritableErrorPaths(t *testing.T) {
t.Fatalf("VolumeMarkWritable missing-volume error mismatch: %v", err)
}
stateResp, err := grpcClient.GetState(ctx, &volume_server_pb.GetStateRequest{})
if err != nil {
t.Fatalf("GetState failed: %v", err)
// enter maintenance mode
framework.EnableMaintenanceMode(t, ctx, grpcClient)
// existing volume in maintenance mode should return "maintenance mode" error
_, err = grpcClient.VolumeMarkReadonly(ctx, &volume_server_pb.VolumeMarkReadonlyRequest{VolumeId: volumeID, Persist: true})
if err == nil || !strings.Contains(err.Error(), "maintenance mode") {
t.Fatalf("VolumeMarkReadonly maintenance error mismatch: %v", err)
}
_, err = grpcClient.SetState(ctx, &volume_server_pb.SetStateRequest{
State: &volume_server_pb.VolumeServerState{
Maintenance: true,
Version: stateResp.GetState().GetVersion(),
},
})
if err != nil {
t.Fatalf("SetState maintenance=true failed: %v", err)
_, err = grpcClient.VolumeMarkWritable(ctx, &volume_server_pb.VolumeMarkWritableRequest{VolumeId: volumeID})
if err == nil || !strings.Contains(err.Error(), "maintenance mode") {
t.Fatalf("VolumeMarkWritable maintenance error mismatch: %v", err)
}
// non-existent volume in maintenance mode should still return "not found"
// (volume lookup happens before maintenance check)
_, err = grpcClient.VolumeMarkReadonly(ctx, &volume_server_pb.VolumeMarkReadonlyRequest{VolumeId: 98773, Persist: true})
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Fatalf("VolumeMarkReadonly missing-volume in maintenance error mismatch: %v", err)
}
_, err = grpcClient.VolumeMarkWritable(ctx, &volume_server_pb.VolumeMarkWritableRequest{VolumeId: 98774})
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Fatalf("VolumeMarkWritable missing-volume in maintenance error mismatch: %v", err)
}
}

View File

@@ -385,3 +385,149 @@ func TestQueryCookieMismatchReturnsEOFNoResults(t *testing.T) {
t.Fatalf("Query cookie mismatch expected EOF with no streamed records, got: %v", err)
}
}
func TestScrubVolumeMarkBrokenReadonlyHealthyVolume(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
clusterHarness := framework.StartSingleVolumeCluster(t, matrix.P1())
conn, grpcClient := framework.DialVolumeServer(t, clusterHarness.VolumeGRPCAddress())
defer conn.Close()
const volumeID = uint32(76)
framework.AllocateVolume(t, grpcClient, volumeID, "")
httpClient := framework.NewHTTPClient()
framework.UploadBytes(t, httpClient, clusterHarness.VolumeAdminURL(), framework.NewFileID(volumeID, 1, 1), []byte("healthy data"))
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
resp, err := grpcClient.ScrubVolume(ctx, &volume_server_pb.ScrubVolumeRequest{
VolumeIds: []uint32{volumeID},
Mode: volume_server_pb.VolumeScrubMode_INDEX,
MarkBrokenVolumesReadonly: true,
})
if err != nil {
t.Fatalf("ScrubVolume with MarkBrokenVolumesReadonly on healthy volume failed: %v", err)
}
if len(resp.GetBrokenVolumeIds()) != 0 {
t.Fatalf("expected no broken volumes, got %v", resp.GetBrokenVolumeIds())
}
statusResp, err := grpcClient.VolumeStatus(ctx, &volume_server_pb.VolumeStatusRequest{VolumeId: volumeID})
if err != nil {
t.Fatalf("VolumeStatus failed: %v", err)
}
if statusResp.GetIsReadOnly() {
t.Fatalf("healthy volume should not be read-only after scrub with MarkBrokenVolumesReadonly")
}
}
func TestScrubVolumeMarkBrokenReadonlyCorruptVolume(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
clusterHarness := framework.StartSingleVolumeCluster(t, matrix.P1())
conn, grpcClient := framework.DialVolumeServer(t, clusterHarness.VolumeGRPCAddress())
defer conn.Close()
const volumeID = uint32(77)
framework.AllocateVolume(t, grpcClient, volumeID, "")
httpClient := framework.NewHTTPClient()
framework.UploadBytes(t, httpClient, clusterHarness.VolumeAdminURL(), framework.NewFileID(volumeID, 1, 1), []byte("test data"))
framework.CorruptIndexFile(t, clusterHarness.BaseDir(), volumeID)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// scrub without the flag: broken volume reported but not marked read-only
resp, err := grpcClient.ScrubVolume(ctx, &volume_server_pb.ScrubVolumeRequest{
VolumeIds: []uint32{volumeID},
Mode: volume_server_pb.VolumeScrubMode_INDEX,
})
if err != nil {
t.Fatalf("ScrubVolume without flag failed: %v", err)
}
if len(resp.GetBrokenVolumeIds()) == 0 {
t.Fatalf("expected broken volume after corruption")
}
statusResp, err := grpcClient.VolumeStatus(ctx, &volume_server_pb.VolumeStatusRequest{VolumeId: volumeID})
if err != nil {
t.Fatalf("VolumeStatus after scrub without flag failed: %v", err)
}
if statusResp.GetIsReadOnly() {
t.Fatalf("volume should not be read-only when MarkBrokenVolumesReadonly is false")
}
// scrub with the flag: broken volume should now be marked read-only
resp, err = grpcClient.ScrubVolume(ctx, &volume_server_pb.ScrubVolumeRequest{
VolumeIds: []uint32{volumeID},
Mode: volume_server_pb.VolumeScrubMode_INDEX,
MarkBrokenVolumesReadonly: true,
})
if err != nil {
t.Fatalf("ScrubVolume with MarkBrokenVolumesReadonly failed: %v", err)
}
if len(resp.GetBrokenVolumeIds()) == 0 {
t.Fatalf("expected broken volume after corruption with flag")
}
found := false
for _, d := range resp.GetDetails() {
if strings.Contains(d, "is now read-only") {
found = true
break
}
}
if !found {
t.Fatalf("expected 'is now read-only' in details, got: %v", resp.GetDetails())
}
statusResp, err = grpcClient.VolumeStatus(ctx, &volume_server_pb.VolumeStatusRequest{VolumeId: volumeID})
if err != nil {
t.Fatalf("VolumeStatus after MarkBrokenVolumesReadonly scrub failed: %v", err)
}
if !statusResp.GetIsReadOnly() {
t.Fatalf("broken volume should be read-only after MarkBrokenVolumesReadonly scrub")
}
}
func TestScrubVolumeMarkBrokenReadonlyInMaintenanceMode(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
clusterHarness := framework.StartSingleVolumeCluster(t, matrix.P1())
conn, grpcClient := framework.DialVolumeServer(t, clusterHarness.VolumeGRPCAddress())
defer conn.Close()
const volumeID = uint32(78)
framework.AllocateVolume(t, grpcClient, volumeID, "")
httpClient := framework.NewHTTPClient()
framework.UploadBytes(t, httpClient, clusterHarness.VolumeAdminURL(), framework.NewFileID(volumeID, 1, 1), []byte("test data"))
framework.CorruptIndexFile(t, clusterHarness.BaseDir(), volumeID)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
framework.EnableMaintenanceMode(t, ctx, grpcClient)
// scrub with the flag in maintenance mode: makeVolumeReadonly should fail
// and ScrubVolume should propagate the error
_, err := grpcClient.ScrubVolume(ctx, &volume_server_pb.ScrubVolumeRequest{
VolumeIds: []uint32{volumeID},
Mode: volume_server_pb.VolumeScrubMode_INDEX,
MarkBrokenVolumesReadonly: true,
})
if err == nil || !strings.Contains(err.Error(), "maintenance mode") {
t.Fatalf("ScrubVolume with MarkBrokenVolumesReadonly in maintenance mode error mismatch: %v", err)
}
}