Add volume server integration test suite and CI workflow (#8322)

* docs(volume_server): add integration test development plan

* test(volume_server): add integration harness and profile matrix

* test(volume_server/http): add admin and options integration coverage

* test(volume_server/grpc): add state and status integration coverage

* test(volume_server): auto-build weed binary and harden cluster startup

* test(volume_server/http): add upload read range head delete coverage

* test(volume_server/grpc): expand admin lifecycle and state coverage

* docs(volume_server): update progress tracker for implemented tests

* test(volume_server/http): cover if-none-match and invalid-range branches

* test(volume_server/grpc): add batch delete integration coverage

* docs(volume_server): log latest HTTP and gRPC test coverage

* ci(volume_server): run volume server integration tests in github actions

* test(volume_server/grpc): add needle status configure ping and leave coverage

* docs(volume_server): record additional grpc coverage progress

* test(volume_server/grpc): add vacuum integration coverage

* docs(volume_server): record vacuum test coverage progress

* test(volume_server/grpc): add read and write needle blob error-path coverage

* docs(volume_server): record data rw grpc coverage progress

* test(volume_server/http): add jwt auth integration coverage

* test(volume_server/grpc): add sync copy and stream error-path coverage

* docs(volume_server): record jwt and sync/copy test coverage

* test(volume_server/grpc): add scrub and query integration coverage

* test(volume_server/grpc): add volume tail sender and receiver coverage

* docs(volume_server): record scrub query and tail test progress

* test(volume_server/grpc): add readonly writable and collection lifecycle coverage

* test(volume_server/http): add public-port cors and method parity coverage

* test(volume_server/grpc): add blob meta and read-all success path coverage

* test(volume_server/grpc): expand scrub and query variation coverage

* test(volume_server/grpc): add tiering and remote fetch error-path coverage

* test(volume_server/http): add unchanged write and delete edge-case coverage

* test(volume_server/grpc): add ping unknown and unreachable target coverage

* test(volume_server/grpc): add volume delete only-empty variation coverage

* test(volume_server/http): add jwt fid-mismatch auth coverage

* test(volume_server/grpc): add scrub ec auto-select empty coverage

* test(volume_server/grpc): stabilize ping timestamp assertion

* docs(volume_server): update integration coverage progress log

* test(volume_server/grpc): add tier remote backend and config variation coverage

* docs(volume_server): record tier remote variation progress

* test(volume_server/grpc): add incremental copy and receive-file protocol coverage

* test(volume_server/http): add read path shape and if-modified-since coverage

* test(volume_server/grpc): add copy-file compaction and receive-file success coverage

* test(volume_server/http): add passthrough headers and static asset coverage

* test(volume_server/grpc): add ping filer unreachable coverage

* docs(volume_server): record copy receive and http variant progress

* test(volume_server/grpc): add erasure coding maintenance and missing-path coverage

* docs(volume_server): record initial erasure coding rpc coverage

* test(volume_server/http): add multi-range multipart response coverage

* docs(volume_server): record multi-range http coverage progress

* test(volume_server/grpc): add query empty-stripe no-match coverage

* docs(volume_server): record query no-match stream behavior coverage

* test(volume_server/http): add upload throttling timeout and replicate bypass coverage

* docs(volume_server): record upload throttling coverage progress

* test(volume_server/http): add download throttling timeout coverage

* docs(volume_server): record download throttling coverage progress

* test(volume_server/http): add jwt wrong-cookie fid mismatch coverage

* docs(volume_server): record jwt wrong-cookie mismatch coverage

* test(volume_server/http): add jwt expired-token rejection coverage

* docs(volume_server): record jwt expired-token coverage

* test(volume_server/http): add jwt query and cookie transport coverage

* docs(volume_server): record jwt token transport coverage

* test(volume_server/http): add jwt token-source precedence coverage

* docs(volume_server): record jwt token-source precedence coverage

* test(volume_server/http): add jwt header-over-cookie precedence coverage

* docs(volume_server): record jwt header cookie precedence coverage

* test(volume_server/http): add jwt query-over-cookie precedence coverage

* docs(volume_server): record jwt query cookie precedence coverage

* test(volume_server/grpc): add setstate version mismatch and nil-state coverage

* docs(volume_server): record setstate validation coverage

* test(volume_server/grpc): add readonly persist-true lifecycle coverage

* docs(volume_server): record readonly persist variation coverage

* test(volume_server/http): add options origin cors header coverage

* docs(volume_server): record options origin cors coverage

* test(volume_server/http): add trace unsupported-method parity coverage

* docs(volume_server): record trace method parity coverage

* test(volume_server/grpc): add batch delete cookie-check variation coverage

* docs(volume_server): record batch delete cookie-check coverage

* test(volume_server/grpc): add admin lifecycle missing and maintenance variants

* docs(volume_server): record admin lifecycle edge-case coverage

* test(volume_server/grpc): add mixed batch delete status matrix coverage

* docs(volume_server): record mixed batch delete matrix coverage

* test(volume_server/http): add jwt-profile ui access gating coverage

* docs(volume_server): record jwt ui-gating http coverage

* test(volume_server/http): add propfind unsupported-method parity coverage

* docs(volume_server): record propfind method parity coverage

* test(volume_server/grpc): add volume configure success and rollback-path coverage

* docs(volume_server): record volume configure branch coverage

* test(volume_server/grpc): add volume needle status missing-path coverage

* docs(volume_server): record volume needle status error-path coverage

* test(volume_server/http): add readDeleted query behavior coverage

* docs(volume_server): record readDeleted http behavior coverage

* test(volume_server/http): add delete ts override parity coverage

* docs(volume_server): record delete ts parity coverage

* test(volume_server/grpc): add invalid blob/meta offset coverage

* docs(volume_server): record invalid blob/meta offset coverage

* test(volume_server/grpc): add read-all mixed volume abort coverage

* docs(volume_server): record read-all mixed-volume abort coverage

* test(volume_server/http): assert head response body parity

* docs(volume_server): record head body parity assertion

* test(volume_server/grpc): assert status state and memory payload completeness

* docs(volume_server): record volume server status payload coverage

* test(volume_server/grpc): add batch delete chunk-manifest rejection coverage

* docs(volume_server): record batch delete chunk-manifest coverage

* test(volume_server/grpc): add query cookie-mismatch eof parity coverage

* docs(volume_server): record query cookie-mismatch parity coverage

* test(volume_server/grpc): add ping master success target coverage

* docs(volume_server): record ping master success coverage

* test(volume_server/http): add head if-none-match conditional parity

* docs(volume_server): record head if-none-match parity coverage

* test(volume_server/http): add head if-modified-since parity coverage

* docs(volume_server): record head if-modified-since parity coverage

* test(volume_server/http): add connect unsupported-method parity coverage

* docs(volume_server): record connect method parity coverage

* test(volume_server/http): assert options allow-headers cors parity

* docs(volume_server): record options allow-headers coverage

* test(volume_server/framework): add dual volume cluster integration harness

* test(volume_server/http): add missing-local read mode proxy redirect local coverage

* docs(volume_server): record read mode missing-local matrix coverage

* test(volume_server/http): add download over-limit replica proxy fallback coverage

* docs(volume_server): record download replica fallback coverage

* test(volume_server/http): add missing-local readDeleted proxy redirect parity coverage

* docs(volume_server): record missing-local readDeleted mode coverage

* test(volume_server/framework): add single-volume cluster with filer harness

* test(volume_server/grpc): add ping filer success target coverage

* docs(volume_server): record ping filer success coverage

* test(volume_server/http): add proxied-loop guard download timeout coverage

* docs(volume_server): record proxied-loop download coverage

* test(volume_server/http): add disabled upload and download limit coverage

* docs(volume_server): record disabled throttling path coverage

* test(volume_server/grpc): add idempotent volume server leave coverage

* docs(volume_server): record leave idempotence coverage

* test(volume_server/http): add redirect collection query preservation coverage

* docs(volume_server): record redirect collection query coverage

* test(volume_server/http): assert admin server headers on status and health

* docs(volume_server): record admin server header coverage

* test(volume_server/http): assert healthz request-id echo parity

* docs(volume_server): record healthz request-id parity coverage

* test(volume_server/http): add over-limit invalid-vid download branch coverage

* docs(volume_server): record over-limit invalid-vid branch coverage

* test(volume_server/http): add public-port static asset coverage

* docs(volume_server): record public static endpoint coverage

* test(volume_server/http): add public head method parity coverage

* docs(volume_server): record public head parity coverage

* test(volume_server/http): add throttling wait-then-proceed path coverage

* docs(volume_server): record throttling wait-then-proceed coverage

* test(volume_server/http): add read cookie-mismatch not-found coverage

* docs(volume_server): record read cookie-mismatch coverage

* test(volume_server/http): add throttling timeout-recovery coverage

* docs(volume_server): record throttling timeout-recovery coverage

* test(volume_server/grpc): add ec generate mount info unmount lifecycle coverage

* docs(volume_server): record ec positive lifecycle coverage

* test(volume_server/grpc): add ec shard read and blob delete lifecycle coverage

* docs(volume_server): record ec shard read/blob delete lifecycle coverage

* test(volume_server/grpc): add ec rebuild and to-volume error branch coverage

* docs(volume_server): record ec rebuild and to-volume branch coverage

* test(volume_server/grpc): add ec shards-to-volume success roundtrip coverage

* docs(volume_server): record ec shards-to-volume success coverage

* test(volume_server/grpc): add ec receive and copy-file missing-source coverage

* docs(volume_server): record ec receive and copy-file coverage

* test(volume_server/grpc): add ec last-shard delete cleanup coverage

* docs(volume_server): record ec last-shard delete cleanup coverage

* test(volume_server/grpc): add volume copy success path coverage

* docs(volume_server): record volume copy success coverage

* test(volume_server/grpc): add volume copy overwrite-destination coverage

* docs(volume_server): record volume copy overwrite coverage

* test(volume_server/http): add write error-path variant coverage

* docs(volume_server): record http write error-path coverage

* test(volume_server/http): add conditional header precedence coverage

* docs(volume_server): record conditional header precedence coverage

* test(volume_server/http): add oversized combined range guard coverage

* docs(volume_server): record oversized range guard coverage

* test(volume_server/http): add image resize and crop read coverage

* docs(volume_server): record image transform coverage

* test(volume_server/http): add chunk-manifest expansion and bypass coverage

* docs(volume_server): record chunk-manifest read coverage

* test(volume_server/http): add compressed read encoding matrix coverage

* docs(volume_server): record compressed read matrix coverage

* test(volume_server/grpc): add tail receiver source replication coverage

* docs(volume_server): record tail receiver replication coverage

* test(volume_server/grpc): add tail sender large-needle chunking coverage

* docs(volume_server): record tail sender chunking coverage

* test(volume_server/grpc): add ec-backed volume needle status coverage

* docs(volume_server): record ec-backed needle status coverage

* test(volume_server/grpc): add ec shard copy from peer success coverage

* docs(volume_server): record ec shard copy success coverage

* test(volume_server/http): add chunk-manifest delete child cleanup coverage

* docs(volume_server): record chunk-manifest delete cleanup coverage

* test(volume_server/http): add chunk-manifest delete failure-path coverage

* docs(volume_server): record chunk-manifest delete failure coverage

* test(volume_server/grpc): add ec shard copy source-unavailable coverage

* docs(volume_server): record ec shard copy source-unavailable coverage

* parallel
This commit is contained in:
Chris Lu
2026-02-13 00:40:56 -08:00
committed by GitHub
parent c433fee36a
commit beeb375a88
41 changed files with 9459 additions and 0 deletions

View File

@@ -0,0 +1,264 @@
package volume_server_grpc_test
import (
"bytes"
"context"
"net/http"
"strings"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/test/volume_server/framework"
"github.com/seaweedfs/seaweedfs/test/volume_server/matrix"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
)
func TestBatchDeleteInvalidFidAndMaintenanceMode(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
cluster := framework.StartSingleVolumeCluster(t, matrix.P1())
conn, client := framework.DialVolumeServer(t, cluster.VolumeGRPCAddress())
defer conn.Close()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
resp, err := client.BatchDelete(ctx, &volume_server_pb.BatchDeleteRequest{FileIds: []string{"bad-fid"}})
if err != nil {
t.Fatalf("BatchDelete invalid fid should return response, got error: %v", err)
}
if len(resp.GetResults()) != 1 {
t.Fatalf("expected one batch delete result, got %d", len(resp.GetResults()))
}
if got := resp.GetResults()[0].GetStatus(); got != 400 {
t.Fatalf("invalid fid expected status 400, got %d", got)
}
stateResp, err := client.GetState(ctx, &volume_server_pb.GetStateRequest{})
if err != nil {
t.Fatalf("GetState failed: %v", err)
}
_, err = client.SetState(ctx, &volume_server_pb.SetStateRequest{
State: &volume_server_pb.VolumeServerState{Maintenance: true, Version: stateResp.GetState().GetVersion()},
})
if err != nil {
t.Fatalf("SetState maintenance=true failed: %v", err)
}
_, err = client.BatchDelete(ctx, &volume_server_pb.BatchDeleteRequest{FileIds: []string{"1,1234567890ab"}})
if err == nil {
t.Fatalf("BatchDelete should fail when maintenance mode is enabled")
}
if !strings.Contains(err.Error(), "maintenance mode") {
t.Fatalf("expected maintenance mode error, got: %v", err)
}
}
func TestBatchDeleteCookieMismatchAndSkipCheck(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
cluster := framework.StartSingleVolumeCluster(t, matrix.P1())
conn, client := framework.DialVolumeServer(t, cluster.VolumeGRPCAddress())
defer conn.Close()
const volumeID = uint32(31)
const needleID = uint64(900001)
const correctCookie = uint32(0x1122AABB)
const wrongCookie = uint32(0x1122AABC)
framework.AllocateVolume(t, client, volumeID, "")
httpClient := framework.NewHTTPClient()
fid := framework.NewFileID(volumeID, needleID, correctCookie)
uploadResp := framework.UploadBytes(t, httpClient, cluster.VolumeAdminURL(), fid, []byte("batch-delete-cookie-check"))
_ = framework.ReadAllAndClose(t, uploadResp)
if uploadResp.StatusCode != http.StatusCreated {
t.Fatalf("upload expected 201, got %d", uploadResp.StatusCode)
}
wrongCookieFid := framework.NewFileID(volumeID, needleID, wrongCookie)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
mismatchResp, err := client.BatchDelete(ctx, &volume_server_pb.BatchDeleteRequest{
FileIds: []string{wrongCookieFid},
SkipCookieCheck: false,
})
if err != nil {
t.Fatalf("BatchDelete with cookie check failed: %v", err)
}
if len(mismatchResp.GetResults()) != 1 {
t.Fatalf("BatchDelete cookie mismatch expected 1 result, got %d", len(mismatchResp.GetResults()))
}
if mismatchResp.GetResults()[0].GetStatus() != http.StatusBadRequest {
t.Fatalf("BatchDelete cookie mismatch expected status 400, got %d", mismatchResp.GetResults()[0].GetStatus())
}
skipCheckResp, err := client.BatchDelete(ctx, &volume_server_pb.BatchDeleteRequest{
FileIds: []string{wrongCookieFid},
SkipCookieCheck: true,
})
if err != nil {
t.Fatalf("BatchDelete skip cookie check failed: %v", err)
}
if len(skipCheckResp.GetResults()) != 1 {
t.Fatalf("BatchDelete skip check expected 1 result, got %d", len(skipCheckResp.GetResults()))
}
if skipCheckResp.GetResults()[0].GetStatus() != http.StatusAccepted {
t.Fatalf("BatchDelete skip check expected status 202, got %d", skipCheckResp.GetResults()[0].GetStatus())
}
readAfterDelete := framework.ReadBytes(t, httpClient, cluster.VolumeAdminURL(), fid)
_ = framework.ReadAllAndClose(t, readAfterDelete)
if readAfterDelete.StatusCode != http.StatusNotFound {
t.Fatalf("read after skip-check batch delete expected 404, got %d", readAfterDelete.StatusCode)
}
}
func TestBatchDeleteMixedStatusesAndMismatchStopsProcessing(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
cluster := framework.StartSingleVolumeCluster(t, matrix.P1())
conn, client := framework.DialVolumeServer(t, cluster.VolumeGRPCAddress())
defer conn.Close()
const volumeID = uint32(32)
framework.AllocateVolume(t, client, volumeID, "")
const needleA = uint64(910001)
const needleB = uint64(910002)
const needleC = uint64(910003)
const cookieA = uint32(0x11111111)
const cookieB = uint32(0x22222222)
const cookieC = uint32(0x33333333)
httpClient := framework.NewHTTPClient()
fidA := framework.NewFileID(volumeID, needleA, cookieA)
fidB := framework.NewFileID(volumeID, needleB, cookieB)
fidC := framework.NewFileID(volumeID, needleC, cookieC)
for _, tc := range []struct {
fid string
body string
}{
{fid: fidA, body: "batch-delete-mixed-a"},
{fid: fidB, body: "batch-delete-mixed-b"},
{fid: fidC, body: "batch-delete-mixed-c"},
} {
uploadResp := framework.UploadBytes(t, httpClient, cluster.VolumeAdminURL(), tc.fid, []byte(tc.body))
_ = framework.ReadAllAndClose(t, uploadResp)
if uploadResp.StatusCode != http.StatusCreated {
t.Fatalf("upload %s expected 201, got %d", tc.fid, uploadResp.StatusCode)
}
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
missingFid := framework.NewFileID(volumeID, 919999, 0x44444444)
mixedResp, err := client.BatchDelete(ctx, &volume_server_pb.BatchDeleteRequest{
FileIds: []string{"bad-fid", fidA, missingFid},
})
if err != nil {
t.Fatalf("BatchDelete mixed status request failed: %v", err)
}
if len(mixedResp.GetResults()) != 3 {
t.Fatalf("BatchDelete mixed status expected 3 results, got %d", len(mixedResp.GetResults()))
}
if mixedResp.GetResults()[0].GetStatus() != http.StatusBadRequest {
t.Fatalf("BatchDelete mixed result[0] expected 400, got %d", mixedResp.GetResults()[0].GetStatus())
}
if mixedResp.GetResults()[1].GetStatus() != http.StatusAccepted {
t.Fatalf("BatchDelete mixed result[1] expected 202, got %d", mixedResp.GetResults()[1].GetStatus())
}
if mixedResp.GetResults()[2].GetStatus() != http.StatusNotFound {
t.Fatalf("BatchDelete mixed result[2] expected 404, got %d", mixedResp.GetResults()[2].GetStatus())
}
readDeletedA := framework.ReadBytes(t, httpClient, cluster.VolumeAdminURL(), fidA)
_ = framework.ReadAllAndClose(t, readDeletedA)
if readDeletedA.StatusCode != http.StatusNotFound {
t.Fatalf("fidA should be deleted after batch delete, got status %d", readDeletedA.StatusCode)
}
wrongCookieB := framework.NewFileID(volumeID, needleB, cookieB+1)
stopResp, err := client.BatchDelete(ctx, &volume_server_pb.BatchDeleteRequest{
FileIds: []string{wrongCookieB, fidC},
})
if err != nil {
t.Fatalf("BatchDelete mismatch-stop request failed: %v", err)
}
if len(stopResp.GetResults()) != 1 {
t.Fatalf("BatchDelete mismatch-stop expected 1 result due early break, got %d", len(stopResp.GetResults()))
}
if stopResp.GetResults()[0].GetStatus() != http.StatusBadRequest {
t.Fatalf("BatchDelete mismatch-stop expected 400, got %d", stopResp.GetResults()[0].GetStatus())
}
readB := framework.ReadBytes(t, httpClient, cluster.VolumeAdminURL(), fidB)
_ = framework.ReadAllAndClose(t, readB)
if readB.StatusCode != http.StatusOK {
t.Fatalf("fidB should remain after cookie mismatch path, got %d", readB.StatusCode)
}
readC := framework.ReadBytes(t, httpClient, cluster.VolumeAdminURL(), fidC)
_ = framework.ReadAllAndClose(t, readC)
if readC.StatusCode != http.StatusOK {
t.Fatalf("fidC should remain when batch processing stops on mismatch, got %d", readC.StatusCode)
}
}
func TestBatchDeleteRejectsChunkManifestNeedles(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
cluster := framework.StartSingleVolumeCluster(t, matrix.P1())
conn, client := framework.DialVolumeServer(t, cluster.VolumeGRPCAddress())
defer conn.Close()
const volumeID = uint32(33)
framework.AllocateVolume(t, client, volumeID, "")
httpClient := framework.NewHTTPClient()
fid := framework.NewFileID(volumeID, 920001, 0x5555AAAA)
req, err := http.NewRequest(http.MethodPost, cluster.VolumeAdminURL()+"/"+fid+"?cm=true", bytes.NewReader([]byte("manifest-placeholder-payload")))
if err != nil {
t.Fatalf("create chunk manifest upload request: %v", err)
}
req.Header.Set("Content-Type", "application/octet-stream")
uploadResp := framework.DoRequest(t, httpClient, req)
_ = framework.ReadAllAndClose(t, uploadResp)
if uploadResp.StatusCode != http.StatusCreated {
t.Fatalf("chunk manifest upload expected 201, got %d", uploadResp.StatusCode)
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
resp, err := client.BatchDelete(ctx, &volume_server_pb.BatchDeleteRequest{FileIds: []string{fid}})
if err != nil {
t.Fatalf("BatchDelete chunk manifest should return response, got grpc error: %v", err)
}
if len(resp.GetResults()) != 1 {
t.Fatalf("BatchDelete chunk manifest expected one result, got %d", len(resp.GetResults()))
}
if resp.GetResults()[0].GetStatus() != http.StatusNotAcceptable {
t.Fatalf("BatchDelete chunk manifest expected status 406, got %d", resp.GetResults()[0].GetStatus())
}
if !strings.Contains(resp.GetResults()[0].GetError(), "ChunkManifest") {
t.Fatalf("BatchDelete chunk manifest expected error mentioning ChunkManifest, got %q", resp.GetResults()[0].GetError())
}
readResp := framework.ReadBytes(t, httpClient, cluster.VolumeAdminURL(), fid)
_ = framework.ReadAllAndClose(t, readResp)
if readResp.StatusCode != http.StatusOK {
t.Fatalf("chunk manifest should not be deleted by BatchDelete reject path, got %d", readResp.StatusCode)
}
}