* optimize: enable immediate EC shard reporting during startup
Ported the immediate EC shard reporting feature from Enterprise to Community version.
This allows the master to be notified about EC shards immediately during volume server startup,
instead of waiting for the first heartbeat.
Changes:
1. Updated NewStore to initialize notification channels BEFORE loading volumes (fixes potential nil panic).
2. Added ecShardNotifyHandler to report EC shards to NewEcShardsChan during startup.
3. Implemented non-blocking channel send for EC reporting to prevent deadlock when loading many EC shards (fixing the enterprise bug 17ac1290c).
4. Updated DiskLocation and EC loading logic to support the callback.
This optimization improves cluster state consistency and startup speed for EC-heavy clusters.
* optimize: report actual EC shard size during startup
* optimize: increase notification channel buffer size to 1024
* optimize: fix variable shadowing in store.go
495 lines
13 KiB
Go
495 lines
13 KiB
Go
package storage
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
type DiskLocation struct {
|
|
Directory string
|
|
DirectoryUuid string
|
|
IdxDirectory string
|
|
DiskType types.DiskType
|
|
MaxVolumeCount int32
|
|
OriginalMaxVolumeCount int32
|
|
MinFreeSpace util.MinFreeSpace
|
|
volumes map[needle.VolumeId]*Volume
|
|
volumesLock sync.RWMutex
|
|
|
|
// erasure coding
|
|
ecVolumes map[needle.VolumeId]*erasure_coding.EcVolume
|
|
ecVolumesLock sync.RWMutex
|
|
|
|
ecShardNotifyHandler func(collection string, vid needle.VolumeId, shardId erasure_coding.ShardId, ecVolume *erasure_coding.EcVolume)
|
|
|
|
isDiskSpaceLow bool
|
|
closeCh chan struct{}
|
|
}
|
|
|
|
func GenerateDirUuid(dir string) (dirUuidString string, err error) {
|
|
glog.V(1).Infof("Getting uuid of volume directory:%s", dir)
|
|
fileName := dir + "/vol_dir.uuid"
|
|
if !util.FileExists(fileName) {
|
|
dirUuidString, err = writeNewUuid(fileName)
|
|
} else {
|
|
uuidData, readErr := os.ReadFile(fileName)
|
|
if readErr != nil {
|
|
return "", fmt.Errorf("failed to read uuid from %s : %v", fileName, readErr)
|
|
}
|
|
if len(uuidData) > 0 {
|
|
dirUuidString = string(uuidData)
|
|
} else {
|
|
dirUuidString, err = writeNewUuid(fileName)
|
|
}
|
|
}
|
|
return dirUuidString, err
|
|
}
|
|
|
|
func writeNewUuid(fileName string) (string, error) {
|
|
dirUuid, _ := uuid.NewRandom()
|
|
dirUuidString := dirUuid.String()
|
|
if err := util.WriteFile(fileName, []byte(dirUuidString), 0644); err != nil {
|
|
return "", fmt.Errorf("failed to write uuid to %s : %v", fileName, err)
|
|
}
|
|
return dirUuidString, nil
|
|
}
|
|
|
|
func NewDiskLocation(dir string, maxVolumeCount int32, minFreeSpace util.MinFreeSpace, idxDir string, diskType types.DiskType) *DiskLocation {
|
|
glog.V(4).Infof("Added new Disk %s: maxVolumes=%d", dir, maxVolumeCount)
|
|
dir = util.ResolvePath(dir)
|
|
if idxDir == "" {
|
|
idxDir = dir
|
|
} else {
|
|
idxDir = util.ResolvePath(idxDir)
|
|
}
|
|
dirUuid, err := GenerateDirUuid(dir)
|
|
if err != nil {
|
|
glog.Fatalf("cannot generate uuid of dir %s: %v", dir, err)
|
|
}
|
|
location := &DiskLocation{
|
|
Directory: dir,
|
|
DirectoryUuid: dirUuid,
|
|
IdxDirectory: idxDir,
|
|
DiskType: diskType,
|
|
MaxVolumeCount: maxVolumeCount,
|
|
OriginalMaxVolumeCount: maxVolumeCount,
|
|
MinFreeSpace: minFreeSpace,
|
|
}
|
|
location.volumes = make(map[needle.VolumeId]*Volume)
|
|
location.ecVolumes = make(map[needle.VolumeId]*erasure_coding.EcVolume)
|
|
location.closeCh = make(chan struct{})
|
|
go func() {
|
|
location.CheckDiskSpace()
|
|
for {
|
|
select {
|
|
case <-location.closeCh:
|
|
return
|
|
case <-time.After(time.Minute):
|
|
location.CheckDiskSpace()
|
|
}
|
|
}
|
|
}()
|
|
return location
|
|
}
|
|
|
|
func volumeIdFromFileName(filename string) (needle.VolumeId, string, error) {
|
|
if isValidVolume(filename) {
|
|
base := filename[:len(filename)-4]
|
|
collection, volumeId, err := parseCollectionVolumeId(base)
|
|
return volumeId, collection, err
|
|
}
|
|
|
|
return 0, "", fmt.Errorf("file is not a volume: %s", filename)
|
|
}
|
|
|
|
func parseCollectionVolumeId(base string) (collection string, vid needle.VolumeId, err error) {
|
|
i := strings.LastIndex(base, "_")
|
|
if i > 0 {
|
|
collection, base = base[0:i], base[i+1:]
|
|
}
|
|
vol, err := needle.NewVolumeId(base)
|
|
return collection, vol, err
|
|
}
|
|
|
|
func isValidVolume(basename string) bool {
|
|
return strings.HasSuffix(basename, ".idx") || strings.HasSuffix(basename, ".vif")
|
|
}
|
|
|
|
func getValidVolumeName(basename string) string {
|
|
if isValidVolume(basename) {
|
|
return basename[:len(basename)-4]
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind NeedleMapKind, skipIfEcVolumesExists bool, ldbTimeout int64, diskId uint32) bool {
|
|
basename := dirEntry.Name()
|
|
if dirEntry.IsDir() {
|
|
return false
|
|
}
|
|
volumeName := getValidVolumeName(basename)
|
|
if volumeName == "" {
|
|
return false
|
|
}
|
|
|
|
// parse out collection, volume id (moved up to use in EC validation)
|
|
vid, collection, err := volumeIdFromFileName(basename)
|
|
if err != nil {
|
|
glog.Warningf("get volume id failed, %s, err : %s", volumeName, err)
|
|
return false
|
|
}
|
|
|
|
// skip if ec volumes exists, but validate EC files first
|
|
if skipIfEcVolumesExists {
|
|
ecxFilePath := filepath.Join(l.IdxDirectory, volumeName+".ecx")
|
|
if util.FileExists(ecxFilePath) {
|
|
// Validate EC volume: shard count, size consistency, and expected size vs .dat file
|
|
if !l.validateEcVolume(collection, vid) {
|
|
glog.Warningf("EC volume %d validation failed, removing incomplete EC files to allow .dat file loading", vid)
|
|
l.removeEcVolumeFiles(collection, vid)
|
|
// Continue to load .dat file
|
|
} else {
|
|
// Valid EC volume exists, skip .dat file
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
|
|
// check for incomplete volume
|
|
noteFile := l.Directory + "/" + volumeName + ".note"
|
|
if util.FileExists(noteFile) {
|
|
note, _ := os.ReadFile(noteFile)
|
|
glog.Warningf("volume %s was not completed: %s", volumeName, string(note))
|
|
removeVolumeFiles(l.Directory + "/" + volumeName)
|
|
removeVolumeFiles(l.IdxDirectory + "/" + volumeName)
|
|
return false
|
|
}
|
|
|
|
// avoid loading one volume more than once
|
|
l.volumesLock.RLock()
|
|
_, found := l.volumes[vid]
|
|
l.volumesLock.RUnlock()
|
|
if found {
|
|
glog.V(1).Infof("loaded volume, %v", vid)
|
|
return true
|
|
}
|
|
|
|
// load the volume
|
|
v, e := NewVolume(l.Directory, l.IdxDirectory, collection, vid, needleMapKind, nil, nil, 0, needle.GetCurrentVersion(), 0, ldbTimeout)
|
|
if e != nil {
|
|
glog.V(0).Infof("new volume %s error %s", volumeName, e)
|
|
return false
|
|
}
|
|
|
|
v.diskId = diskId // Set the disk ID for existing volumes
|
|
l.SetVolume(vid, v)
|
|
|
|
size, _, _ := v.FileStat()
|
|
glog.V(2).Infof("data file %s, replication=%s v=%d size=%d ttl=%s disk_id=%d",
|
|
l.Directory+"/"+volumeName+".dat", v.ReplicaPlacement, v.Version(), size, v.Ttl.String(), diskId)
|
|
return true
|
|
}
|
|
|
|
func (l *DiskLocation) concurrentLoadingVolumes(needleMapKind NeedleMapKind, concurrency int, ldbTimeout int64, diskId uint32) {
|
|
|
|
task_queue := make(chan os.DirEntry, 10*concurrency)
|
|
go func() {
|
|
foundVolumeNames := make(map[string]bool)
|
|
if dirEntries, err := os.ReadDir(l.Directory); err == nil {
|
|
for _, entry := range dirEntries {
|
|
volumeName := getValidVolumeName(entry.Name())
|
|
if volumeName == "" {
|
|
continue
|
|
}
|
|
if _, found := foundVolumeNames[volumeName]; !found {
|
|
foundVolumeNames[volumeName] = true
|
|
task_queue <- entry
|
|
}
|
|
}
|
|
}
|
|
close(task_queue)
|
|
}()
|
|
|
|
var wg sync.WaitGroup
|
|
for workerNum := 0; workerNum < concurrency; workerNum++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
for fi := range task_queue {
|
|
_ = l.loadExistingVolume(fi, needleMapKind, true, ldbTimeout, diskId)
|
|
}
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapKind, ldbTimeout int64) {
|
|
l.loadExistingVolumesWithId(needleMapKind, ldbTimeout, 0) // Default disk ID for backward compatibility
|
|
}
|
|
|
|
func (l *DiskLocation) loadExistingVolumesWithId(needleMapKind NeedleMapKind, ldbTimeout int64, diskId uint32) {
|
|
|
|
workerNum := runtime.NumCPU()
|
|
val, ok := os.LookupEnv("GOMAXPROCS")
|
|
if ok {
|
|
num, err := strconv.Atoi(val)
|
|
if err != nil || num < 1 {
|
|
num = 10
|
|
glog.Warningf("failed to set worker number from GOMAXPROCS , set to default:10")
|
|
}
|
|
workerNum = num
|
|
} else {
|
|
if workerNum <= 10 {
|
|
workerNum = 10
|
|
}
|
|
}
|
|
l.concurrentLoadingVolumes(needleMapKind, workerNum, ldbTimeout, diskId)
|
|
glog.V(2).Infof("Store started on dir: %s with %d volumes max %d (disk ID: %d)", l.Directory, len(l.volumes), l.MaxVolumeCount, diskId)
|
|
|
|
l.loadAllEcShardsWithCallback(l.ecShardNotifyHandler)
|
|
glog.V(2).Infof("Store started on dir: %s with %d ec shards (disk ID: %d)", l.Directory, len(l.ecVolumes), diskId)
|
|
|
|
}
|
|
|
|
func (l *DiskLocation) DeleteCollectionFromDiskLocation(collection string) (e error) {
|
|
|
|
l.volumesLock.Lock()
|
|
delVolsMap := l.unmountVolumeByCollection(collection)
|
|
l.volumesLock.Unlock()
|
|
|
|
l.ecVolumesLock.Lock()
|
|
delEcVolsMap := l.unmountEcVolumeByCollection(collection)
|
|
l.ecVolumesLock.Unlock()
|
|
|
|
errChain := make(chan error, 2)
|
|
var wg sync.WaitGroup
|
|
wg.Add(2)
|
|
go func() {
|
|
for _, v := range delVolsMap {
|
|
if err := v.Destroy(false); err != nil {
|
|
errChain <- err
|
|
}
|
|
}
|
|
wg.Done()
|
|
}()
|
|
|
|
go func() {
|
|
for _, v := range delEcVolsMap {
|
|
v.Destroy()
|
|
}
|
|
wg.Done()
|
|
}()
|
|
|
|
go func() {
|
|
wg.Wait()
|
|
close(errChain)
|
|
}()
|
|
|
|
errBuilder := strings.Builder{}
|
|
for err := range errChain {
|
|
errBuilder.WriteString(err.Error())
|
|
errBuilder.WriteString("; ")
|
|
}
|
|
if errBuilder.Len() > 0 {
|
|
e = fmt.Errorf("%s", errBuilder.String())
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (l *DiskLocation) deleteVolumeById(vid needle.VolumeId, onlyEmpty bool) (found bool, e error) {
|
|
v, ok := l.volumes[vid]
|
|
if !ok {
|
|
return
|
|
}
|
|
e = v.Destroy(onlyEmpty)
|
|
if e != nil {
|
|
return
|
|
}
|
|
found = true
|
|
delete(l.volumes, vid)
|
|
return
|
|
}
|
|
|
|
func (l *DiskLocation) LoadVolume(diskId uint32, vid needle.VolumeId, needleMapKind NeedleMapKind) bool {
|
|
if fileInfo, found := l.LocateVolume(vid); found {
|
|
return l.loadExistingVolume(fileInfo, needleMapKind, false, 0, diskId)
|
|
}
|
|
return false
|
|
}
|
|
|
|
var ErrVolumeNotFound = fmt.Errorf("volume not found")
|
|
|
|
func (l *DiskLocation) DeleteVolume(vid needle.VolumeId, onlyEmpty bool) error {
|
|
l.volumesLock.Lock()
|
|
defer l.volumesLock.Unlock()
|
|
|
|
_, ok := l.volumes[vid]
|
|
if !ok {
|
|
return ErrVolumeNotFound
|
|
}
|
|
_, err := l.deleteVolumeById(vid, onlyEmpty)
|
|
return err
|
|
}
|
|
|
|
func (l *DiskLocation) UnloadVolume(vid needle.VolumeId) error {
|
|
l.volumesLock.Lock()
|
|
defer l.volumesLock.Unlock()
|
|
|
|
v, ok := l.volumes[vid]
|
|
if !ok {
|
|
return ErrVolumeNotFound
|
|
}
|
|
v.Close()
|
|
delete(l.volumes, vid)
|
|
return nil
|
|
}
|
|
|
|
func (l *DiskLocation) unmountVolumeByCollection(collectionName string) map[needle.VolumeId]*Volume {
|
|
deltaVols := make(map[needle.VolumeId]*Volume, 0)
|
|
for k, v := range l.volumes {
|
|
if v.Collection == collectionName && !v.isCompacting && !v.isCommitCompacting {
|
|
deltaVols[k] = v
|
|
}
|
|
}
|
|
|
|
for k := range deltaVols {
|
|
delete(l.volumes, k)
|
|
}
|
|
return deltaVols
|
|
}
|
|
|
|
func (l *DiskLocation) SetVolume(vid needle.VolumeId, volume *Volume) {
|
|
l.volumesLock.Lock()
|
|
defer l.volumesLock.Unlock()
|
|
|
|
l.volumes[vid] = volume
|
|
volume.location = l
|
|
}
|
|
|
|
func (l *DiskLocation) FindVolume(vid needle.VolumeId) (*Volume, bool) {
|
|
l.volumesLock.RLock()
|
|
defer l.volumesLock.RUnlock()
|
|
|
|
v, ok := l.volumes[vid]
|
|
return v, ok
|
|
}
|
|
|
|
func (l *DiskLocation) VolumesLen() int {
|
|
l.volumesLock.RLock()
|
|
defer l.volumesLock.RUnlock()
|
|
|
|
return len(l.volumes)
|
|
}
|
|
|
|
func (l *DiskLocation) LocalVolumesLen() int {
|
|
l.volumesLock.RLock()
|
|
defer l.volumesLock.RUnlock()
|
|
|
|
count := 0
|
|
for _, v := range l.volumes {
|
|
if !v.HasRemoteFile() {
|
|
count++
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
|
|
func (l *DiskLocation) SetStopping() {
|
|
l.volumesLock.Lock()
|
|
for _, v := range l.volumes {
|
|
v.SyncToDisk()
|
|
}
|
|
l.volumesLock.Unlock()
|
|
|
|
return
|
|
}
|
|
|
|
func (l *DiskLocation) Close() {
|
|
l.volumesLock.Lock()
|
|
for _, v := range l.volumes {
|
|
v.Close()
|
|
}
|
|
l.volumesLock.Unlock()
|
|
|
|
l.ecVolumesLock.Lock()
|
|
for _, ecVolume := range l.ecVolumes {
|
|
ecVolume.Close()
|
|
}
|
|
l.ecVolumesLock.Unlock()
|
|
|
|
close(l.closeCh)
|
|
return
|
|
}
|
|
|
|
func (l *DiskLocation) LocateVolume(vid needle.VolumeId) (os.DirEntry, bool) {
|
|
// println("LocateVolume", vid, "on", l.Directory)
|
|
if dirEntries, err := os.ReadDir(l.Directory); err == nil {
|
|
for _, entry := range dirEntries {
|
|
// println("checking", entry.Name(), "...")
|
|
volId, _, err := volumeIdFromFileName(entry.Name())
|
|
// println("volId", volId, "err", err)
|
|
if vid == volId && err == nil {
|
|
return entry, true
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil, false
|
|
}
|
|
|
|
func (l *DiskLocation) UnUsedSpace(volumeSizeLimit uint64) (unUsedSpace uint64) {
|
|
l.volumesLock.RLock()
|
|
defer l.volumesLock.RUnlock()
|
|
|
|
for _, vol := range l.volumes {
|
|
if vol.IsReadOnly() {
|
|
continue
|
|
}
|
|
datSize, idxSize, _ := vol.FileStat()
|
|
unUsedSpaceVolume := int64(volumeSizeLimit) - int64(datSize+idxSize)
|
|
glog.V(4).Infof("Volume stats for %d: volumeSizeLimit=%d, datSize=%d idxSize=%d unused=%d", vol.Id, volumeSizeLimit, datSize, idxSize, unUsedSpaceVolume)
|
|
if unUsedSpaceVolume >= 0 {
|
|
unUsedSpace += uint64(unUsedSpaceVolume)
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (l *DiskLocation) CheckDiskSpace() {
|
|
if dir, e := filepath.Abs(l.Directory); e == nil {
|
|
s := stats.NewDiskStatus(dir)
|
|
stats.VolumeServerResourceGauge.WithLabelValues(l.Directory, "all").Set(float64(s.All))
|
|
stats.VolumeServerResourceGauge.WithLabelValues(l.Directory, "used").Set(float64(s.Used))
|
|
stats.VolumeServerResourceGauge.WithLabelValues(l.Directory, "free").Set(float64(s.Free))
|
|
|
|
isLow, desc := l.MinFreeSpace.IsLow(s.Free, s.PercentFree)
|
|
if isLow != l.isDiskSpaceLow {
|
|
l.isDiskSpaceLow = !l.isDiskSpaceLow
|
|
}
|
|
|
|
logLevel := glog.Level(4)
|
|
if l.isDiskSpaceLow {
|
|
logLevel = glog.Level(0)
|
|
}
|
|
|
|
glog.V(logLevel).Infof("dir %s %s", dir, desc)
|
|
}
|
|
}
|