* fix(ec): gather shards from all disk locations before rebuild (#8631)
Fix "too few shards given" error during ec.rebuild on multi-disk volume
servers. The root cause has two parts:
1. VolumeEcShardsRebuild only looked at a single disk location for shard
files. On multi-disk servers, the existing local shards could be on one
disk while copied shards were placed on another, causing the rebuild to
see fewer shards than actually available.
2. VolumeEcShardsCopy had a DiskId condition (req.DiskId == 0 &&
len(vs.store.Locations) > 0) that was always true, making the
FindFreeLocation fallback dead code. This meant copies always went to
Locations[0] regardless of where existing shards were.
Changes:
- VolumeEcShardsRebuild now finds the location with the most shards,
then gathers shard files from other locations via hard links (or
symlinks for cross-device) before rebuilding. Gathered files are
cleaned up after rebuild.
- VolumeEcShardsCopy now only uses Locations[DiskId] when DiskId > 0
(explicitly set). Otherwise, it prefers the location that already has
the EC volume, falling back to HDD then any free location.
- generateMissingEcFiles now logs shard counts and provides a clear
error message when not enough shards are found, instead of passing
through to the opaque reedsolomon "too few shards given" error.
* fix(ec): update test to match skip behavior for unrepairable volumes
The test expected an error for volumes with insufficient shards, but
commit 5acb4578a changed unrepairable volumes to be skipped with a log
message instead of returning an error. Update the test to verify the
skip behavior and log output.
* fix(ec): address PR review comments
- Add comment clarifying DiskId=0 means "not specified" (protobuf default),
callers must use DiskId >= 1 to target a specific disk.
- Log warnings on cleanup failures for gathered shard links.
* fix(ec): read shard files from other disks directly instead of linking
Replace the hard link / symlink gathering approach with passing
additional search directories into RebuildEcFiles. The rebuild
function now opens shard files directly from whichever disk they
live on, avoiding filesystem link operations and cleanup.
RebuildEcFiles and RebuildEcFilesWithContext gain a variadic
additionalDirs parameter (backward compatible with existing callers).
* fix(ec): clarify DiskId selection semantics in VolumeEcShardsCopy comment
* fix(ec): avoid empty files on failed rebuild; don't skip ecx-only locations
- generateMissingEcFiles: two-pass approach — first discover present/missing
shards and check reconstructability, only then create output files. This
avoids leaving behind empty truncated shard files when there are too few
shards to rebuild.
- VolumeEcShardsRebuild: compute hasEcx before skipping zero-shard locations.
A location with an .ecx file but no shard files (all shards on other disks)
is now a valid rebuild candidate instead of being silently skipped.
* fix(ec): select ecx-only location as rebuildLocation when none chosen yet
When rebuildLocation is nil and a location has hasEcx=true but
existingShardCount=0 (all shards on other disks), the condition
0 > 0 was false so it was never promoted to rebuildLocation.
Add rebuildLocation == nil to the predicate so the first location
with an .ecx file is always selected as a candidate.
685 lines
23 KiB
Go
685 lines
23 KiB
Go
package weed_server
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"os"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/operation"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/volume_info"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
/*
|
|
|
|
Steps to apply erasure coding to .dat .idx files
|
|
0. ensure the volume is readonly
|
|
1. client call VolumeEcShardsGenerate to generate the .ecx and .ec00 ~ .ec13 files
|
|
2. client ask master for possible servers to hold the ec files
|
|
3. client call VolumeEcShardsCopy on above target servers to copy ec files from the source server
|
|
4. target servers report the new ec files to the master
|
|
5. master stores vid -> [14]*DataNode
|
|
6. client checks master. If all 14 slices are ready, delete the original .idx, .idx files
|
|
|
|
*/
|
|
|
|
// VolumeEcShardsGenerate generates the .ecx and .ec00 ~ .ec13 files
|
|
func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_server_pb.VolumeEcShardsGenerateRequest) (*volume_server_pb.VolumeEcShardsGenerateResponse, error) {
|
|
if err := vs.CheckMaintenanceMode(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
glog.V(0).Infof("VolumeEcShardsGenerate: %v", req)
|
|
|
|
v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
|
|
if v == nil {
|
|
return nil, fmt.Errorf("volume %d not found", req.VolumeId)
|
|
}
|
|
baseFileName := v.DataFileName()
|
|
|
|
if v.Collection != req.Collection {
|
|
return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection)
|
|
}
|
|
|
|
// Create EC context - prefer existing .vif config if present (for regeneration scenarios)
|
|
ecCtx := erasure_coding.NewDefaultECContext(req.Collection, needle.VolumeId(req.VolumeId))
|
|
if volumeInfo, _, found, _ := volume_info.MaybeLoadVolumeInfo(baseFileName + ".vif"); found && volumeInfo.EcShardConfig != nil {
|
|
ds := int(volumeInfo.EcShardConfig.DataShards)
|
|
ps := int(volumeInfo.EcShardConfig.ParityShards)
|
|
|
|
// Validate and use existing EC config
|
|
if ds > 0 && ps > 0 && ds+ps <= erasure_coding.MaxShardCount {
|
|
ecCtx.DataShards = ds
|
|
ecCtx.ParityShards = ps
|
|
glog.V(0).Infof("Using existing EC config for volume %d: %s", req.VolumeId, ecCtx.String())
|
|
} else {
|
|
glog.Warningf("Invalid EC config in .vif for volume %d (data=%d, parity=%d), using defaults", req.VolumeId, ds, ps)
|
|
}
|
|
} else {
|
|
glog.V(0).Infof("Using default EC config for volume %d: %s", req.VolumeId, ecCtx.String())
|
|
}
|
|
|
|
shouldCleanup := true
|
|
defer func() {
|
|
if !shouldCleanup {
|
|
return
|
|
}
|
|
for i := 0; i < ecCtx.Total(); i++ {
|
|
os.Remove(baseFileName + ecCtx.ToExt(i))
|
|
}
|
|
os.Remove(v.IndexFileName() + ".ecx")
|
|
}()
|
|
|
|
// write .ec00 ~ .ec[TotalShards-1] files using context
|
|
if err := erasure_coding.WriteEcFilesWithContext(baseFileName, ecCtx); err != nil {
|
|
return nil, fmt.Errorf("WriteEcFilesWithContext %s: %v", baseFileName, err)
|
|
}
|
|
|
|
// write .ecx file
|
|
if err := erasure_coding.WriteSortedFileFromIdx(v.IndexFileName(), ".ecx"); err != nil {
|
|
return nil, fmt.Errorf("WriteSortedFileFromIdx %s: %v", v.IndexFileName(), err)
|
|
}
|
|
|
|
// write .vif files
|
|
var expireAtSec uint64
|
|
if v.Ttl != nil {
|
|
ttlSecond := v.Ttl.ToSeconds()
|
|
if ttlSecond > 0 {
|
|
expireAtSec = uint64(time.Now().Unix()) + ttlSecond //calculated expiration time
|
|
}
|
|
}
|
|
volumeInfo := &volume_server_pb.VolumeInfo{Version: uint32(v.Version())}
|
|
volumeInfo.ExpireAtSec = expireAtSec
|
|
|
|
datSize, _, _ := v.FileStat()
|
|
volumeInfo.DatFileSize = int64(datSize)
|
|
|
|
// Validate EC configuration before saving to .vif
|
|
if ecCtx.DataShards <= 0 || ecCtx.ParityShards <= 0 || ecCtx.Total() > erasure_coding.MaxShardCount {
|
|
return nil, fmt.Errorf("invalid EC config before saving: data=%d, parity=%d, total=%d (max=%d)",
|
|
ecCtx.DataShards, ecCtx.ParityShards, ecCtx.Total(), erasure_coding.MaxShardCount)
|
|
}
|
|
|
|
// Save EC configuration to VolumeInfo
|
|
volumeInfo.EcShardConfig = &volume_server_pb.EcShardConfig{
|
|
DataShards: uint32(ecCtx.DataShards),
|
|
ParityShards: uint32(ecCtx.ParityShards),
|
|
}
|
|
glog.V(1).Infof("Saving EC config to .vif for volume %d: %d+%d (total: %d)",
|
|
req.VolumeId, ecCtx.DataShards, ecCtx.ParityShards, ecCtx.Total())
|
|
|
|
if err := volume_info.SaveVolumeInfo(baseFileName+".vif", volumeInfo); err != nil {
|
|
return nil, fmt.Errorf("SaveVolumeInfo %s: %v", baseFileName, err)
|
|
}
|
|
|
|
shouldCleanup = false
|
|
|
|
return &volume_server_pb.VolumeEcShardsGenerateResponse{}, nil
|
|
}
|
|
|
|
// VolumeEcShardsRebuild generates the any of the missing .ec00 ~ .ec13 files
|
|
func (vs *VolumeServer) VolumeEcShardsRebuild(ctx context.Context, req *volume_server_pb.VolumeEcShardsRebuildRequest) (*volume_server_pb.VolumeEcShardsRebuildResponse, error) {
|
|
if err := vs.CheckMaintenanceMode(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
glog.V(0).Infof("VolumeEcShardsRebuild: %v", req)
|
|
baseFileName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId))
|
|
|
|
var rebuiltShardIds []uint32
|
|
|
|
// Find the rebuild location: the location with the most shards and an .ecx file.
|
|
// With multi-disk servers, shards may be spread across different locations.
|
|
var rebuildLocation *storage.DiskLocation
|
|
var rebuildShardCount int
|
|
var otherLocationsWithShards []*storage.DiskLocation
|
|
|
|
for _, location := range vs.store.Locations {
|
|
_, _, existingShardCount, err := checkEcVolumeStatus(baseFileName, location)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
indexBaseFileName := path.Join(location.IdxDirectory, baseFileName)
|
|
if !util.FileExists(indexBaseFileName+".ecx") && location.IdxDirectory != location.Directory {
|
|
indexBaseFileName = path.Join(location.Directory, baseFileName)
|
|
}
|
|
hasEcx := util.FileExists(indexBaseFileName + ".ecx")
|
|
|
|
// Skip locations that have neither shard files nor an .ecx file.
|
|
if existingShardCount == 0 && !hasEcx {
|
|
continue
|
|
}
|
|
|
|
if hasEcx && (rebuildLocation == nil || existingShardCount > rebuildShardCount) {
|
|
if rebuildLocation != nil {
|
|
otherLocationsWithShards = append(otherLocationsWithShards, rebuildLocation)
|
|
}
|
|
rebuildLocation = location
|
|
rebuildShardCount = existingShardCount
|
|
} else {
|
|
otherLocationsWithShards = append(otherLocationsWithShards, location)
|
|
}
|
|
}
|
|
|
|
if rebuildLocation == nil {
|
|
return &volume_server_pb.VolumeEcShardsRebuildResponse{}, nil
|
|
}
|
|
|
|
// Collect additional directories where shard files may exist.
|
|
// On multi-disk servers, existing local shards may be on a different disk
|
|
// than where copied shards were placed during ec.rebuild.
|
|
rebuildDataDir := rebuildLocation.Directory
|
|
var additionalDirs []string
|
|
for _, otherLocation := range otherLocationsWithShards {
|
|
additionalDirs = append(additionalDirs, otherLocation.Directory)
|
|
}
|
|
|
|
// Rebuild missing EC files, searching all disk locations for input shards
|
|
dataBaseFileName := path.Join(rebuildDataDir, baseFileName)
|
|
if generatedShardIds, err := erasure_coding.RebuildEcFiles(dataBaseFileName, additionalDirs...); err != nil {
|
|
return nil, fmt.Errorf("RebuildEcFiles %s: %v", dataBaseFileName, err)
|
|
} else {
|
|
rebuiltShardIds = generatedShardIds
|
|
}
|
|
|
|
indexBaseFileName := path.Join(rebuildLocation.IdxDirectory, baseFileName)
|
|
if !util.FileExists(indexBaseFileName+".ecx") && rebuildLocation.IdxDirectory != rebuildLocation.Directory {
|
|
indexBaseFileName = path.Join(rebuildLocation.Directory, baseFileName)
|
|
}
|
|
if err := erasure_coding.RebuildEcxFile(indexBaseFileName); err != nil {
|
|
return nil, fmt.Errorf("RebuildEcxFile %s: %v", indexBaseFileName, err)
|
|
}
|
|
|
|
return &volume_server_pb.VolumeEcShardsRebuildResponse{
|
|
RebuiltShardIds: rebuiltShardIds,
|
|
}, nil
|
|
}
|
|
|
|
// VolumeEcShardsCopy copy the .ecx and some ec data slices
|
|
func (vs *VolumeServer) VolumeEcShardsCopy(ctx context.Context, req *volume_server_pb.VolumeEcShardsCopyRequest) (*volume_server_pb.VolumeEcShardsCopyResponse, error) {
|
|
if err := vs.CheckMaintenanceMode(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
glog.V(0).Infof("VolumeEcShardsCopy: %v", req)
|
|
|
|
var location *storage.DiskLocation
|
|
|
|
// Select the target location for storing EC shard files.
|
|
//
|
|
// When req.DiskId > 0 the caller is explicitly choosing a disk:
|
|
// location = vs.store.Locations[req.DiskId]
|
|
// (DiskId=1 → Locations[1], DiskId=2 → Locations[2], etc.)
|
|
//
|
|
// When req.DiskId == 0 (the protobuf default, meaning "not specified")
|
|
// we auto-select location by preferring the disk that already holds EC
|
|
// shards for this volume, then falling back to any HDD, then any disk.
|
|
//
|
|
// Note: Locations[0] cannot be targeted explicitly via DiskId because 0
|
|
// is indistinguishable from "unset". It can still be chosen by the
|
|
// auto-select logic.
|
|
if req.DiskId > 0 {
|
|
// Validate disk ID is within bounds
|
|
if int(req.DiskId) >= len(vs.store.Locations) {
|
|
return nil, fmt.Errorf("invalid disk_id %d: only have %d disks", req.DiskId, len(vs.store.Locations))
|
|
}
|
|
|
|
// Use the specific disk location
|
|
location = vs.store.Locations[req.DiskId]
|
|
glog.V(1).Infof("Using disk %d for EC shard copy: %s", req.DiskId, location.Directory)
|
|
} else {
|
|
// Prefer a location that already has shards for this volume,
|
|
// so all shards end up on the same disk for rebuild.
|
|
location = vs.store.FindFreeLocation(func(loc *storage.DiskLocation) bool {
|
|
_, found := loc.FindEcVolume(needle.VolumeId(req.VolumeId))
|
|
return found
|
|
})
|
|
if location == nil {
|
|
// Fall back to any HDD location with free space
|
|
location = vs.store.FindFreeLocation(func(loc *storage.DiskLocation) bool {
|
|
return loc.DiskType == types.HardDriveType
|
|
})
|
|
}
|
|
if location == nil {
|
|
// Fall back to any location with free space
|
|
location = vs.store.FindFreeLocation(func(loc *storage.DiskLocation) bool {
|
|
return true
|
|
})
|
|
}
|
|
if location == nil {
|
|
return nil, fmt.Errorf("no space left")
|
|
}
|
|
}
|
|
|
|
dataBaseFileName := storage.VolumeFileName(location.Directory, req.Collection, int(req.VolumeId))
|
|
indexBaseFileName := storage.VolumeFileName(location.IdxDirectory, req.Collection, int(req.VolumeId))
|
|
|
|
err := operation.WithVolumeServerClient(true, pb.ServerAddress(req.SourceDataNode), vs.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
|
|
|
|
// copy ec data slices
|
|
for _, shardId := range req.ShardIds {
|
|
if _, err := vs.doCopyFile(client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, dataBaseFileName, erasure_coding.ToExt(int(shardId)), false, false, nil); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if req.CopyEcxFile {
|
|
|
|
// copy ecx file
|
|
if _, err := vs.doCopyFile(client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, indexBaseFileName, ".ecx", false, false, nil); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if req.CopyEcjFile {
|
|
// copy ecj file
|
|
if _, err := vs.doCopyFile(client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, indexBaseFileName, ".ecj", true, true, nil); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if req.CopyVifFile {
|
|
// copy vif file
|
|
if _, err := vs.doCopyFile(client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, dataBaseFileName, ".vif", false, true, nil); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("VolumeEcShardsCopy volume %d: %v", req.VolumeId, err)
|
|
}
|
|
|
|
return &volume_server_pb.VolumeEcShardsCopyResponse{}, nil
|
|
}
|
|
|
|
// VolumeEcShardsDelete local delete the .ecx and some ec data slices if not needed
|
|
// the shard should not be mounted before calling this.
|
|
func (vs *VolumeServer) VolumeEcShardsDelete(ctx context.Context, req *volume_server_pb.VolumeEcShardsDeleteRequest) (*volume_server_pb.VolumeEcShardsDeleteResponse, error) {
|
|
if err := vs.CheckMaintenanceMode(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
bName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId))
|
|
|
|
glog.V(0).Infof("ec volume %s shard delete %v", bName, req.ShardIds)
|
|
|
|
for _, location := range vs.store.Locations {
|
|
if err := deleteEcShardIdsForEachLocation(bName, location, req.ShardIds); err != nil {
|
|
glog.Errorf("deleteEcShards from %s %s.%v: %v", location.Directory, bName, req.ShardIds, err)
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return &volume_server_pb.VolumeEcShardsDeleteResponse{}, nil
|
|
}
|
|
|
|
func deleteEcShardIdsForEachLocation(bName string, location *storage.DiskLocation, shardIds []uint32) error {
|
|
|
|
found := false
|
|
|
|
indexBaseFilename := path.Join(location.IdxDirectory, bName)
|
|
dataBaseFilename := path.Join(location.Directory, bName)
|
|
|
|
ecxExists := util.FileExists(path.Join(location.IdxDirectory, bName+".ecx"))
|
|
if !ecxExists && location.IdxDirectory != location.Directory {
|
|
ecxExists = util.FileExists(path.Join(location.Directory, bName+".ecx"))
|
|
}
|
|
if ecxExists {
|
|
for _, shardId := range shardIds {
|
|
shardFileName := dataBaseFilename + erasure_coding.ToExt(int(shardId))
|
|
if util.FileExists(shardFileName) {
|
|
found = true
|
|
os.Remove(shardFileName)
|
|
}
|
|
}
|
|
}
|
|
|
|
if !found {
|
|
return nil
|
|
}
|
|
|
|
hasEcxFile, hasIdxFile, existingShardCount, err := checkEcVolumeStatus(bName, location)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if hasEcxFile && existingShardCount == 0 {
|
|
// Remove .ecx/.ecj from both idx and data directories
|
|
// since they may be in either location depending on when -dir.idx was configured
|
|
if err := os.Remove(indexBaseFilename + ".ecx"); err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
os.Remove(indexBaseFilename + ".ecj")
|
|
if location.IdxDirectory != location.Directory {
|
|
os.Remove(dataBaseFilename + ".ecx")
|
|
os.Remove(dataBaseFilename + ".ecj")
|
|
}
|
|
|
|
if !hasIdxFile {
|
|
// .vif is used for ec volumes and normal volumes
|
|
os.Remove(dataBaseFilename + ".vif")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func checkEcVolumeStatus(bName string, location *storage.DiskLocation) (hasEcxFile bool, hasIdxFile bool, existingShardCount int, err error) {
|
|
// check whether to delete the .ecx and .ecj file also
|
|
fileInfos, err := os.ReadDir(location.Directory)
|
|
if err != nil {
|
|
return false, false, 0, err
|
|
}
|
|
if location.IdxDirectory != location.Directory {
|
|
idxFileInfos, err := os.ReadDir(location.IdxDirectory)
|
|
if err != nil {
|
|
return false, false, 0, err
|
|
}
|
|
fileInfos = append(fileInfos, idxFileInfos...)
|
|
}
|
|
for _, fileInfo := range fileInfos {
|
|
if fileInfo.Name() == bName+".ecx" || fileInfo.Name() == bName+".ecj" {
|
|
hasEcxFile = true
|
|
continue
|
|
}
|
|
if fileInfo.Name() == bName+".idx" {
|
|
hasIdxFile = true
|
|
continue
|
|
}
|
|
if isEcDataShardFile(fileInfo.Name(), bName) {
|
|
existingShardCount++
|
|
}
|
|
}
|
|
return hasEcxFile, hasIdxFile, existingShardCount, nil
|
|
}
|
|
|
|
func isEcDataShardFile(fileName, baseName string) bool {
|
|
const ecDataShardSuffixLen = 2 // ".ecNN"
|
|
prefix := baseName + ".ec"
|
|
if !strings.HasPrefix(fileName, prefix) {
|
|
return false
|
|
}
|
|
suffix := strings.TrimPrefix(fileName, prefix)
|
|
if len(suffix) != ecDataShardSuffixLen {
|
|
return false
|
|
}
|
|
shardId, err := strconv.Atoi(suffix)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
return shardId >= 0 && shardId < erasure_coding.MaxShardCount
|
|
}
|
|
|
|
func (vs *VolumeServer) VolumeEcShardsMount(ctx context.Context, req *volume_server_pb.VolumeEcShardsMountRequest) (*volume_server_pb.VolumeEcShardsMountResponse, error) {
|
|
|
|
glog.V(0).Infof("VolumeEcShardsMount: %v", req)
|
|
|
|
for _, shardId := range req.ShardIds {
|
|
err := vs.store.MountEcShards(req.Collection, needle.VolumeId(req.VolumeId), erasure_coding.ShardId(shardId))
|
|
|
|
if err != nil {
|
|
glog.Errorf("ec shard mount %v: %v", req, err)
|
|
} else {
|
|
glog.V(2).Infof("ec shard mount %v", req)
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("mount %d.%d: %v", req.VolumeId, shardId, err)
|
|
}
|
|
}
|
|
|
|
return &volume_server_pb.VolumeEcShardsMountResponse{}, nil
|
|
}
|
|
|
|
func (vs *VolumeServer) VolumeEcShardsUnmount(ctx context.Context, req *volume_server_pb.VolumeEcShardsUnmountRequest) (*volume_server_pb.VolumeEcShardsUnmountResponse, error) {
|
|
|
|
glog.V(0).Infof("VolumeEcShardsUnmount: %v", req)
|
|
|
|
for _, shardId := range req.ShardIds {
|
|
err := vs.store.UnmountEcShards(needle.VolumeId(req.VolumeId), erasure_coding.ShardId(shardId))
|
|
|
|
if err != nil {
|
|
glog.Errorf("ec shard unmount %v: %v", req, err)
|
|
} else {
|
|
glog.V(2).Infof("ec shard unmount %v", req)
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unmount %d.%d: %v", req.VolumeId, shardId, err)
|
|
}
|
|
}
|
|
|
|
return &volume_server_pb.VolumeEcShardsUnmountResponse{}, nil
|
|
}
|
|
|
|
func (vs *VolumeServer) VolumeEcShardRead(req *volume_server_pb.VolumeEcShardReadRequest, stream volume_server_pb.VolumeServer_VolumeEcShardReadServer) error {
|
|
|
|
ecVolume, found := vs.store.FindEcVolume(needle.VolumeId(req.VolumeId))
|
|
if !found {
|
|
return fmt.Errorf("VolumeEcShardRead not found ec volume id %d", req.VolumeId)
|
|
}
|
|
ecShard, found := ecVolume.FindEcVolumeShard(erasure_coding.ShardId(req.ShardId))
|
|
if !found {
|
|
return fmt.Errorf("not found ec shard %d.%d", req.VolumeId, req.ShardId)
|
|
}
|
|
|
|
if req.FileKey != 0 {
|
|
_, size, _ := ecVolume.FindNeedleFromEcx(types.Uint64ToNeedleId(req.FileKey))
|
|
if size.IsDeleted() {
|
|
return stream.Send(&volume_server_pb.VolumeEcShardReadResponse{
|
|
IsDeleted: true,
|
|
})
|
|
}
|
|
}
|
|
|
|
bufSize := req.Size
|
|
if bufSize > BufferSizeLimit {
|
|
bufSize = BufferSizeLimit
|
|
}
|
|
buffer := make([]byte, bufSize)
|
|
|
|
startOffset, bytesToRead := req.Offset, req.Size
|
|
|
|
for bytesToRead > 0 {
|
|
// min of bytesToRead and bufSize
|
|
bufferSize := bufSize
|
|
if bufferSize > bytesToRead {
|
|
bufferSize = bytesToRead
|
|
}
|
|
bytesread, err := ecShard.ReadAt(buffer[0:bufferSize], startOffset)
|
|
|
|
// println("read", ecShard.FileName(), "startOffset", startOffset, bytesread, "bytes, with target", bufferSize)
|
|
if bytesread > 0 {
|
|
|
|
if int64(bytesread) > bytesToRead {
|
|
bytesread = int(bytesToRead)
|
|
}
|
|
err = stream.Send(&volume_server_pb.VolumeEcShardReadResponse{
|
|
Data: buffer[:bytesread],
|
|
})
|
|
if err != nil {
|
|
// println("sending", bytesread, "bytes err", err.Error())
|
|
return err
|
|
}
|
|
|
|
startOffset += int64(bytesread)
|
|
bytesToRead -= int64(bytesread)
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
if err != io.EOF {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
func (vs *VolumeServer) VolumeEcBlobDelete(ctx context.Context, req *volume_server_pb.VolumeEcBlobDeleteRequest) (*volume_server_pb.VolumeEcBlobDeleteResponse, error) {
|
|
if err := vs.CheckMaintenanceMode(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
glog.V(0).Infof("VolumeEcBlobDelete: %v", req)
|
|
|
|
resp := &volume_server_pb.VolumeEcBlobDeleteResponse{}
|
|
|
|
for _, location := range vs.store.Locations {
|
|
if localEcVolume, found := location.FindEcVolume(needle.VolumeId(req.VolumeId)); found {
|
|
|
|
_, size, _, err := localEcVolume.LocateEcShardNeedle(types.NeedleId(req.FileKey), needle.Version(req.Version))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("locate in local ec volume: %w", err)
|
|
}
|
|
if size.IsDeleted() {
|
|
return resp, nil
|
|
}
|
|
|
|
err = localEcVolume.DeleteNeedleFromEcx(types.NeedleId(req.FileKey))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
break
|
|
}
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
// VolumeEcShardsToVolume generates the .idx, .dat files from .ecx, .ecj and .ec01 ~ .ec14 files
|
|
func (vs *VolumeServer) VolumeEcShardsToVolume(ctx context.Context, req *volume_server_pb.VolumeEcShardsToVolumeRequest) (*volume_server_pb.VolumeEcShardsToVolumeResponse, error) {
|
|
if err := vs.CheckMaintenanceMode(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
glog.V(0).Infof("VolumeEcShardsToVolume: %v", req)
|
|
|
|
// Collect all EC shards (NewEcVolume will load EC config from .vif into v.ECContext)
|
|
// Use MaxShardCount (32) to support custom EC ratios up to 32 total shards
|
|
tempShards := make([]string, erasure_coding.MaxShardCount)
|
|
v, found := vs.store.CollectEcShards(needle.VolumeId(req.VolumeId), tempShards)
|
|
if !found {
|
|
return nil, fmt.Errorf("ec volume %d not found", req.VolumeId)
|
|
}
|
|
|
|
if v.Collection != req.Collection {
|
|
return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection)
|
|
}
|
|
|
|
// Use EC context (already loaded from .vif) to determine data shard count
|
|
dataShards := v.ECContext.DataShards
|
|
|
|
// Defensive validation to prevent panics from corrupted ECContext
|
|
if dataShards <= 0 || dataShards > erasure_coding.MaxShardCount {
|
|
return nil, fmt.Errorf("invalid data shard count %d for volume %d (must be 1..%d)", dataShards, req.VolumeId, erasure_coding.MaxShardCount)
|
|
}
|
|
|
|
shardFileNames := tempShards[:dataShards]
|
|
glog.V(1).Infof("Using EC config from volume %d: %d data shards", req.VolumeId, dataShards)
|
|
|
|
// Verify all data shards are present
|
|
for shardId := 0; shardId < dataShards; shardId++ {
|
|
if shardFileNames[shardId] == "" {
|
|
return nil, fmt.Errorf("ec volume %d missing shard %d", req.VolumeId, shardId)
|
|
}
|
|
}
|
|
|
|
dataBaseFileName, indexBaseFileName := v.DataBaseFileName(), v.IndexBaseFileName()
|
|
|
|
// If the EC index contains no live entries, decoding should be a no-op:
|
|
// just allow the caller to purge EC shards and do not generate an empty normal volume.
|
|
hasLive, err := erasure_coding.HasLiveNeedles(indexBaseFileName)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("HasLiveNeedles %s: %w", indexBaseFileName, err)
|
|
}
|
|
if !hasLive {
|
|
return nil, status.Errorf(codes.FailedPrecondition, "ec volume %d %s", req.VolumeId, erasure_coding.EcNoLiveEntriesSubstring)
|
|
}
|
|
|
|
// calculate .dat file size
|
|
datFileSize, err := erasure_coding.FindDatFileSize(dataBaseFileName, indexBaseFileName)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("FindDatFileSize %s: %v", dataBaseFileName, err)
|
|
}
|
|
|
|
// write .dat file from .ec00 ~ .ec09 files
|
|
if err := erasure_coding.WriteDatFile(dataBaseFileName, datFileSize, shardFileNames); err != nil {
|
|
return nil, fmt.Errorf("WriteDatFile %s: %v", dataBaseFileName, err)
|
|
}
|
|
|
|
// write .idx file from .ecx and .ecj files
|
|
if err := erasure_coding.WriteIdxFileFromEcIndex(indexBaseFileName); err != nil {
|
|
return nil, fmt.Errorf("WriteIdxFileFromEcIndex %s: %v", v.IndexBaseFileName(), err)
|
|
}
|
|
|
|
return &volume_server_pb.VolumeEcShardsToVolumeResponse{}, nil
|
|
}
|
|
|
|
func (vs *VolumeServer) VolumeEcShardsInfo(ctx context.Context, req *volume_server_pb.VolumeEcShardsInfoRequest) (*volume_server_pb.VolumeEcShardsInfoResponse, error) {
|
|
glog.V(0).Infof("VolumeEcShardsInfo: volume %d", req.VolumeId)
|
|
|
|
glog.V(0).Infof("VolumeEcStatus: %v", req)
|
|
|
|
vid := needle.VolumeId(req.GetVolumeId())
|
|
ecv, found := vs.store.FindEcVolume(vid)
|
|
if !found {
|
|
return nil, fmt.Errorf("VolumeEcStatus: EC volume %d not found", vid)
|
|
}
|
|
|
|
shardInfos := make([]*volume_server_pb.EcShardInfo, len(ecv.Shards))
|
|
for i, s := range ecv.Shards {
|
|
shardInfos[i] = s.ToEcShardInfo()
|
|
}
|
|
|
|
var files, filesDeleted, totalSize uint64
|
|
err := ecv.WalkIndex(func(_ types.NeedleId, _ types.Offset, size types.Size) error {
|
|
// deleted files are counted when computing EC volume sizes. this aligns with VolumeStatus(),
|
|
// which reports the raw data backend file size, regardless of deleted files.
|
|
totalSize += uint64(size.Raw())
|
|
|
|
if size.IsDeleted() {
|
|
filesDeleted++
|
|
} else {
|
|
files++
|
|
}
|
|
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
res := &volume_server_pb.VolumeEcShardsInfoResponse{
|
|
EcShardInfos: shardInfos,
|
|
FileCount: files,
|
|
FileDeletedCount: filesDeleted,
|
|
VolumeSize: totalSize,
|
|
}
|
|
|
|
return res, nil
|
|
}
|