Files
seaweedFS/weed/storage/disk_location.go
Chris Lu 4e2af080df optimize: enable immediate EC shard reporting during startup (#7933)
* optimize: enable immediate EC shard reporting during startup

Ported the immediate EC shard reporting feature from Enterprise to Community version.
This allows the master to be notified about EC shards immediately during volume server startup,
instead of waiting for the first heartbeat.

Changes:
1. Updated NewStore to initialize notification channels BEFORE loading volumes (fixes potential nil panic).
2. Added ecShardNotifyHandler to report EC shards to NewEcShardsChan during startup.
3. Implemented non-blocking channel send for EC reporting to prevent deadlock when loading many EC shards (fixing the enterprise bug 17ac1290c).
4. Updated DiskLocation and EC loading logic to support the callback.

This optimization improves cluster state consistency and startup speed for EC-heavy clusters.

* optimize: report actual EC shard size during startup

* optimize: increase notification channel buffer size to 1024

* optimize: fix variable shadowing in store.go
2026-01-01 15:39:54 -08:00

495 lines
13 KiB
Go

package storage
import (
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
"github.com/google/uuid"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/util"
)
type DiskLocation struct {
Directory string
DirectoryUuid string
IdxDirectory string
DiskType types.DiskType
MaxVolumeCount int32
OriginalMaxVolumeCount int32
MinFreeSpace util.MinFreeSpace
volumes map[needle.VolumeId]*Volume
volumesLock sync.RWMutex
// erasure coding
ecVolumes map[needle.VolumeId]*erasure_coding.EcVolume
ecVolumesLock sync.RWMutex
ecShardNotifyHandler func(collection string, vid needle.VolumeId, shardId erasure_coding.ShardId, ecVolume *erasure_coding.EcVolume)
isDiskSpaceLow bool
closeCh chan struct{}
}
func GenerateDirUuid(dir string) (dirUuidString string, err error) {
glog.V(1).Infof("Getting uuid of volume directory:%s", dir)
fileName := dir + "/vol_dir.uuid"
if !util.FileExists(fileName) {
dirUuidString, err = writeNewUuid(fileName)
} else {
uuidData, readErr := os.ReadFile(fileName)
if readErr != nil {
return "", fmt.Errorf("failed to read uuid from %s : %v", fileName, readErr)
}
if len(uuidData) > 0 {
dirUuidString = string(uuidData)
} else {
dirUuidString, err = writeNewUuid(fileName)
}
}
return dirUuidString, err
}
func writeNewUuid(fileName string) (string, error) {
dirUuid, _ := uuid.NewRandom()
dirUuidString := dirUuid.String()
if err := util.WriteFile(fileName, []byte(dirUuidString), 0644); err != nil {
return "", fmt.Errorf("failed to write uuid to %s : %v", fileName, err)
}
return dirUuidString, nil
}
func NewDiskLocation(dir string, maxVolumeCount int32, minFreeSpace util.MinFreeSpace, idxDir string, diskType types.DiskType) *DiskLocation {
glog.V(4).Infof("Added new Disk %s: maxVolumes=%d", dir, maxVolumeCount)
dir = util.ResolvePath(dir)
if idxDir == "" {
idxDir = dir
} else {
idxDir = util.ResolvePath(idxDir)
}
dirUuid, err := GenerateDirUuid(dir)
if err != nil {
glog.Fatalf("cannot generate uuid of dir %s: %v", dir, err)
}
location := &DiskLocation{
Directory: dir,
DirectoryUuid: dirUuid,
IdxDirectory: idxDir,
DiskType: diskType,
MaxVolumeCount: maxVolumeCount,
OriginalMaxVolumeCount: maxVolumeCount,
MinFreeSpace: minFreeSpace,
}
location.volumes = make(map[needle.VolumeId]*Volume)
location.ecVolumes = make(map[needle.VolumeId]*erasure_coding.EcVolume)
location.closeCh = make(chan struct{})
go func() {
location.CheckDiskSpace()
for {
select {
case <-location.closeCh:
return
case <-time.After(time.Minute):
location.CheckDiskSpace()
}
}
}()
return location
}
func volumeIdFromFileName(filename string) (needle.VolumeId, string, error) {
if isValidVolume(filename) {
base := filename[:len(filename)-4]
collection, volumeId, err := parseCollectionVolumeId(base)
return volumeId, collection, err
}
return 0, "", fmt.Errorf("file is not a volume: %s", filename)
}
func parseCollectionVolumeId(base string) (collection string, vid needle.VolumeId, err error) {
i := strings.LastIndex(base, "_")
if i > 0 {
collection, base = base[0:i], base[i+1:]
}
vol, err := needle.NewVolumeId(base)
return collection, vol, err
}
func isValidVolume(basename string) bool {
return strings.HasSuffix(basename, ".idx") || strings.HasSuffix(basename, ".vif")
}
func getValidVolumeName(basename string) string {
if isValidVolume(basename) {
return basename[:len(basename)-4]
}
return ""
}
func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind NeedleMapKind, skipIfEcVolumesExists bool, ldbTimeout int64, diskId uint32) bool {
basename := dirEntry.Name()
if dirEntry.IsDir() {
return false
}
volumeName := getValidVolumeName(basename)
if volumeName == "" {
return false
}
// parse out collection, volume id (moved up to use in EC validation)
vid, collection, err := volumeIdFromFileName(basename)
if err != nil {
glog.Warningf("get volume id failed, %s, err : %s", volumeName, err)
return false
}
// skip if ec volumes exists, but validate EC files first
if skipIfEcVolumesExists {
ecxFilePath := filepath.Join(l.IdxDirectory, volumeName+".ecx")
if util.FileExists(ecxFilePath) {
// Validate EC volume: shard count, size consistency, and expected size vs .dat file
if !l.validateEcVolume(collection, vid) {
glog.Warningf("EC volume %d validation failed, removing incomplete EC files to allow .dat file loading", vid)
l.removeEcVolumeFiles(collection, vid)
// Continue to load .dat file
} else {
// Valid EC volume exists, skip .dat file
return false
}
}
}
// check for incomplete volume
noteFile := l.Directory + "/" + volumeName + ".note"
if util.FileExists(noteFile) {
note, _ := os.ReadFile(noteFile)
glog.Warningf("volume %s was not completed: %s", volumeName, string(note))
removeVolumeFiles(l.Directory + "/" + volumeName)
removeVolumeFiles(l.IdxDirectory + "/" + volumeName)
return false
}
// avoid loading one volume more than once
l.volumesLock.RLock()
_, found := l.volumes[vid]
l.volumesLock.RUnlock()
if found {
glog.V(1).Infof("loaded volume, %v", vid)
return true
}
// load the volume
v, e := NewVolume(l.Directory, l.IdxDirectory, collection, vid, needleMapKind, nil, nil, 0, needle.GetCurrentVersion(), 0, ldbTimeout)
if e != nil {
glog.V(0).Infof("new volume %s error %s", volumeName, e)
return false
}
v.diskId = diskId // Set the disk ID for existing volumes
l.SetVolume(vid, v)
size, _, _ := v.FileStat()
glog.V(2).Infof("data file %s, replication=%s v=%d size=%d ttl=%s disk_id=%d",
l.Directory+"/"+volumeName+".dat", v.ReplicaPlacement, v.Version(), size, v.Ttl.String(), diskId)
return true
}
func (l *DiskLocation) concurrentLoadingVolumes(needleMapKind NeedleMapKind, concurrency int, ldbTimeout int64, diskId uint32) {
task_queue := make(chan os.DirEntry, 10*concurrency)
go func() {
foundVolumeNames := make(map[string]bool)
if dirEntries, err := os.ReadDir(l.Directory); err == nil {
for _, entry := range dirEntries {
volumeName := getValidVolumeName(entry.Name())
if volumeName == "" {
continue
}
if _, found := foundVolumeNames[volumeName]; !found {
foundVolumeNames[volumeName] = true
task_queue <- entry
}
}
}
close(task_queue)
}()
var wg sync.WaitGroup
for workerNum := 0; workerNum < concurrency; workerNum++ {
wg.Add(1)
go func() {
defer wg.Done()
for fi := range task_queue {
_ = l.loadExistingVolume(fi, needleMapKind, true, ldbTimeout, diskId)
}
}()
}
wg.Wait()
}
func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapKind, ldbTimeout int64) {
l.loadExistingVolumesWithId(needleMapKind, ldbTimeout, 0) // Default disk ID for backward compatibility
}
func (l *DiskLocation) loadExistingVolumesWithId(needleMapKind NeedleMapKind, ldbTimeout int64, diskId uint32) {
workerNum := runtime.NumCPU()
val, ok := os.LookupEnv("GOMAXPROCS")
if ok {
num, err := strconv.Atoi(val)
if err != nil || num < 1 {
num = 10
glog.Warningf("failed to set worker number from GOMAXPROCS , set to default:10")
}
workerNum = num
} else {
if workerNum <= 10 {
workerNum = 10
}
}
l.concurrentLoadingVolumes(needleMapKind, workerNum, ldbTimeout, diskId)
glog.V(2).Infof("Store started on dir: %s with %d volumes max %d (disk ID: %d)", l.Directory, len(l.volumes), l.MaxVolumeCount, diskId)
l.loadAllEcShardsWithCallback(l.ecShardNotifyHandler)
glog.V(2).Infof("Store started on dir: %s with %d ec shards (disk ID: %d)", l.Directory, len(l.ecVolumes), diskId)
}
func (l *DiskLocation) DeleteCollectionFromDiskLocation(collection string) (e error) {
l.volumesLock.Lock()
delVolsMap := l.unmountVolumeByCollection(collection)
l.volumesLock.Unlock()
l.ecVolumesLock.Lock()
delEcVolsMap := l.unmountEcVolumeByCollection(collection)
l.ecVolumesLock.Unlock()
errChain := make(chan error, 2)
var wg sync.WaitGroup
wg.Add(2)
go func() {
for _, v := range delVolsMap {
if err := v.Destroy(false); err != nil {
errChain <- err
}
}
wg.Done()
}()
go func() {
for _, v := range delEcVolsMap {
v.Destroy()
}
wg.Done()
}()
go func() {
wg.Wait()
close(errChain)
}()
errBuilder := strings.Builder{}
for err := range errChain {
errBuilder.WriteString(err.Error())
errBuilder.WriteString("; ")
}
if errBuilder.Len() > 0 {
e = fmt.Errorf("%s", errBuilder.String())
}
return
}
func (l *DiskLocation) deleteVolumeById(vid needle.VolumeId, onlyEmpty bool) (found bool, e error) {
v, ok := l.volumes[vid]
if !ok {
return
}
e = v.Destroy(onlyEmpty)
if e != nil {
return
}
found = true
delete(l.volumes, vid)
return
}
func (l *DiskLocation) LoadVolume(diskId uint32, vid needle.VolumeId, needleMapKind NeedleMapKind) bool {
if fileInfo, found := l.LocateVolume(vid); found {
return l.loadExistingVolume(fileInfo, needleMapKind, false, 0, diskId)
}
return false
}
var ErrVolumeNotFound = fmt.Errorf("volume not found")
func (l *DiskLocation) DeleteVolume(vid needle.VolumeId, onlyEmpty bool) error {
l.volumesLock.Lock()
defer l.volumesLock.Unlock()
_, ok := l.volumes[vid]
if !ok {
return ErrVolumeNotFound
}
_, err := l.deleteVolumeById(vid, onlyEmpty)
return err
}
func (l *DiskLocation) UnloadVolume(vid needle.VolumeId) error {
l.volumesLock.Lock()
defer l.volumesLock.Unlock()
v, ok := l.volumes[vid]
if !ok {
return ErrVolumeNotFound
}
v.Close()
delete(l.volumes, vid)
return nil
}
func (l *DiskLocation) unmountVolumeByCollection(collectionName string) map[needle.VolumeId]*Volume {
deltaVols := make(map[needle.VolumeId]*Volume, 0)
for k, v := range l.volumes {
if v.Collection == collectionName && !v.isCompacting && !v.isCommitCompacting {
deltaVols[k] = v
}
}
for k := range deltaVols {
delete(l.volumes, k)
}
return deltaVols
}
func (l *DiskLocation) SetVolume(vid needle.VolumeId, volume *Volume) {
l.volumesLock.Lock()
defer l.volumesLock.Unlock()
l.volumes[vid] = volume
volume.location = l
}
func (l *DiskLocation) FindVolume(vid needle.VolumeId) (*Volume, bool) {
l.volumesLock.RLock()
defer l.volumesLock.RUnlock()
v, ok := l.volumes[vid]
return v, ok
}
func (l *DiskLocation) VolumesLen() int {
l.volumesLock.RLock()
defer l.volumesLock.RUnlock()
return len(l.volumes)
}
func (l *DiskLocation) LocalVolumesLen() int {
l.volumesLock.RLock()
defer l.volumesLock.RUnlock()
count := 0
for _, v := range l.volumes {
if !v.HasRemoteFile() {
count++
}
}
return count
}
func (l *DiskLocation) SetStopping() {
l.volumesLock.Lock()
for _, v := range l.volumes {
v.SyncToDisk()
}
l.volumesLock.Unlock()
return
}
func (l *DiskLocation) Close() {
l.volumesLock.Lock()
for _, v := range l.volumes {
v.Close()
}
l.volumesLock.Unlock()
l.ecVolumesLock.Lock()
for _, ecVolume := range l.ecVolumes {
ecVolume.Close()
}
l.ecVolumesLock.Unlock()
close(l.closeCh)
return
}
func (l *DiskLocation) LocateVolume(vid needle.VolumeId) (os.DirEntry, bool) {
// println("LocateVolume", vid, "on", l.Directory)
if dirEntries, err := os.ReadDir(l.Directory); err == nil {
for _, entry := range dirEntries {
// println("checking", entry.Name(), "...")
volId, _, err := volumeIdFromFileName(entry.Name())
// println("volId", volId, "err", err)
if vid == volId && err == nil {
return entry, true
}
}
}
return nil, false
}
func (l *DiskLocation) UnUsedSpace(volumeSizeLimit uint64) (unUsedSpace uint64) {
l.volumesLock.RLock()
defer l.volumesLock.RUnlock()
for _, vol := range l.volumes {
if vol.IsReadOnly() {
continue
}
datSize, idxSize, _ := vol.FileStat()
unUsedSpaceVolume := int64(volumeSizeLimit) - int64(datSize+idxSize)
glog.V(4).Infof("Volume stats for %d: volumeSizeLimit=%d, datSize=%d idxSize=%d unused=%d", vol.Id, volumeSizeLimit, datSize, idxSize, unUsedSpaceVolume)
if unUsedSpaceVolume >= 0 {
unUsedSpace += uint64(unUsedSpaceVolume)
}
}
return
}
func (l *DiskLocation) CheckDiskSpace() {
if dir, e := filepath.Abs(l.Directory); e == nil {
s := stats.NewDiskStatus(dir)
stats.VolumeServerResourceGauge.WithLabelValues(l.Directory, "all").Set(float64(s.All))
stats.VolumeServerResourceGauge.WithLabelValues(l.Directory, "used").Set(float64(s.Used))
stats.VolumeServerResourceGauge.WithLabelValues(l.Directory, "free").Set(float64(s.Free))
isLow, desc := l.MinFreeSpace.IsLow(s.Free, s.PercentFree)
if isLow != l.isDiskSpaceLow {
l.isDiskSpaceLow = !l.isDiskSpaceLow
}
logLevel := glog.Level(4)
if l.isDiskSpaceLow {
logLevel = glog.Level(0)
}
glog.V(logLevel).Infof("dir %s %s", dir, desc)
}
}