ec: fall back to data dir when ecx file not found in idx dir (#8541)
* ec: fall back to data dir when ecx file not found in idx dir (#8540) When -dir.idx is configured after EC encoding, the .ecx/.ecj files remain in the data directory. NewEcVolume now falls back to the data directory when the index file is not found in dirIdx. * ec: add fallback logging and improved error message for ecx lookup * ec: preserve configured dirIdx, track actual ecx location separately The previous fallback set ev.dirIdx = dir when finding .ecx in the data directory, which corrupted IndexBaseFileName() for future writes (e.g., WriteIdxFileFromEcIndex during EC-to-volume conversion would write the .idx file to the data directory instead of the configured index directory). Introduce ecxActualDir to track where .ecx/.ecj were actually found, used only by FileName() for cleanup/destroy. IndexBaseFileName() continues to use the configured dirIdx for new file creation. * ec: check both idx and data dirs for .ecx in all cleanup and lookup paths When -dir.idx is configured after EC encoding, .ecx/.ecj files may reside in the data directory. Several code paths only checked l.IdxDirectory, causing them to miss these files: - removeEcVolumeFiles: now removes .ecx/.ecj from both directories - loadExistingVolume: ecx existence check falls back to data dir - deleteEcShardIdsForEachLocation: ecx existence check and cleanup both cover the data directory - VolumeEcShardsRebuild: ecx lookup falls back to data directory so RebuildEcxFile operates on the correct file
This commit is contained in:
@@ -154,7 +154,12 @@ func (vs *VolumeServer) VolumeEcShardsRebuild(ctx context.Context, req *volume_s
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if util.FileExists(path.Join(location.IdxDirectory, baseFileName+".ecx")) {
|
indexBaseFileName := path.Join(location.IdxDirectory, baseFileName)
|
||||||
|
if !util.FileExists(indexBaseFileName+".ecx") && location.IdxDirectory != location.Directory {
|
||||||
|
// .ecx may be in the data directory if created before -dir.idx was configured
|
||||||
|
indexBaseFileName = path.Join(location.Directory, baseFileName)
|
||||||
|
}
|
||||||
|
if util.FileExists(indexBaseFileName + ".ecx") {
|
||||||
// write .ec00 ~ .ec13 files
|
// write .ec00 ~ .ec13 files
|
||||||
dataBaseFileName := path.Join(location.Directory, baseFileName)
|
dataBaseFileName := path.Join(location.Directory, baseFileName)
|
||||||
if generatedShardIds, err := erasure_coding.RebuildEcFiles(dataBaseFileName); err != nil {
|
if generatedShardIds, err := erasure_coding.RebuildEcFiles(dataBaseFileName); err != nil {
|
||||||
@@ -163,9 +168,8 @@ func (vs *VolumeServer) VolumeEcShardsRebuild(ctx context.Context, req *volume_s
|
|||||||
rebuiltShardIds = generatedShardIds
|
rebuiltShardIds = generatedShardIds
|
||||||
}
|
}
|
||||||
|
|
||||||
indexBaseFileName := path.Join(location.IdxDirectory, baseFileName)
|
|
||||||
if err := erasure_coding.RebuildEcxFile(indexBaseFileName); err != nil {
|
if err := erasure_coding.RebuildEcxFile(indexBaseFileName); err != nil {
|
||||||
return nil, fmt.Errorf("RebuildEcxFile %s: %v", dataBaseFileName, err)
|
return nil, fmt.Errorf("RebuildEcxFile %s: %v", indexBaseFileName, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
break
|
break
|
||||||
@@ -283,7 +287,11 @@ func deleteEcShardIdsForEachLocation(bName string, location *storage.DiskLocatio
|
|||||||
indexBaseFilename := path.Join(location.IdxDirectory, bName)
|
indexBaseFilename := path.Join(location.IdxDirectory, bName)
|
||||||
dataBaseFilename := path.Join(location.Directory, bName)
|
dataBaseFilename := path.Join(location.Directory, bName)
|
||||||
|
|
||||||
if util.FileExists(path.Join(location.IdxDirectory, bName+".ecx")) {
|
ecxExists := util.FileExists(path.Join(location.IdxDirectory, bName+".ecx"))
|
||||||
|
if !ecxExists && location.IdxDirectory != location.Directory {
|
||||||
|
ecxExists = util.FileExists(path.Join(location.Directory, bName+".ecx"))
|
||||||
|
}
|
||||||
|
if ecxExists {
|
||||||
for _, shardId := range shardIds {
|
for _, shardId := range shardIds {
|
||||||
shardFileName := dataBaseFilename + erasure_coding.ToExt(int(shardId))
|
shardFileName := dataBaseFilename + erasure_coding.ToExt(int(shardId))
|
||||||
if util.FileExists(shardFileName) {
|
if util.FileExists(shardFileName) {
|
||||||
@@ -303,10 +311,16 @@ func deleteEcShardIdsForEachLocation(bName string, location *storage.DiskLocatio
|
|||||||
}
|
}
|
||||||
|
|
||||||
if hasEcxFile && existingShardCount == 0 {
|
if hasEcxFile && existingShardCount == 0 {
|
||||||
if err := os.Remove(indexBaseFilename + ".ecx"); err != nil {
|
// Remove .ecx/.ecj from both idx and data directories
|
||||||
|
// since they may be in either location depending on when -dir.idx was configured
|
||||||
|
if err := os.Remove(indexBaseFilename + ".ecx"); err != nil && !os.IsNotExist(err) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
os.Remove(indexBaseFilename + ".ecj")
|
os.Remove(indexBaseFilename + ".ecj")
|
||||||
|
if location.IdxDirectory != location.Directory {
|
||||||
|
os.Remove(dataBaseFilename + ".ecx")
|
||||||
|
os.Remove(dataBaseFilename + ".ecj")
|
||||||
|
}
|
||||||
|
|
||||||
if !hasIdxFile {
|
if !hasIdxFile {
|
||||||
// .vif is used for ec volumes and normal volumes
|
// .vif is used for ec volumes and normal volumes
|
||||||
|
|||||||
@@ -172,6 +172,10 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne
|
|||||||
// skip if ec volumes exists, but validate EC files first
|
// skip if ec volumes exists, but validate EC files first
|
||||||
if skipIfEcVolumesExists {
|
if skipIfEcVolumesExists {
|
||||||
ecxFilePath := filepath.Join(l.IdxDirectory, volumeName+".ecx")
|
ecxFilePath := filepath.Join(l.IdxDirectory, volumeName+".ecx")
|
||||||
|
if !util.FileExists(ecxFilePath) && l.IdxDirectory != l.Directory {
|
||||||
|
// .ecx may have been created before -dir.idx was configured
|
||||||
|
ecxFilePath = filepath.Join(l.Directory, volumeName+".ecx")
|
||||||
|
}
|
||||||
if util.FileExists(ecxFilePath) {
|
if util.FileExists(ecxFilePath) {
|
||||||
// Validate EC volume: shard count, size consistency, and expected size vs .dat file
|
// Validate EC volume: shard count, size consistency, and expected size vs .dat file
|
||||||
if !l.validateEcVolume(collection, vid) {
|
if !l.validateEcVolume(collection, vid) {
|
||||||
|
|||||||
@@ -476,6 +476,11 @@ func (l *DiskLocation) removeEcVolumeFiles(collection string, vid needle.VolumeI
|
|||||||
// EC loading for incomplete/missing shards on next startup
|
// EC loading for incomplete/missing shards on next startup
|
||||||
removeFile(indexBaseFileName+".ecx", "EC index file")
|
removeFile(indexBaseFileName+".ecx", "EC index file")
|
||||||
removeFile(indexBaseFileName+".ecj", "EC journal file")
|
removeFile(indexBaseFileName+".ecj", "EC journal file")
|
||||||
|
// Also try the data directory in case .ecx/.ecj were created before -dir.idx was configured
|
||||||
|
if l.IdxDirectory != l.Directory {
|
||||||
|
removeFile(baseFileName+".ecx", "EC index file (fallback)")
|
||||||
|
removeFile(baseFileName+".ecj", "EC journal file (fallback)")
|
||||||
|
}
|
||||||
|
|
||||||
// Remove all EC shard files (.ec00 ~ .ec31) from data directory
|
// Remove all EC shard files (.ec00 ~ .ec31) from data directory
|
||||||
// Use MaxShardCount (32) to support custom EC ratios
|
// Use MaxShardCount (32) to support custom EC ratios
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ type EcVolume struct {
|
|||||||
Collection string
|
Collection string
|
||||||
dir string
|
dir string
|
||||||
dirIdx string
|
dirIdx string
|
||||||
|
ecxActualDir string // directory where .ecx/.ecj were actually found (may differ from dirIdx after fallback)
|
||||||
ecxFile *os.File
|
ecxFile *os.File
|
||||||
ecxFileSize int64
|
ecxFileSize int64
|
||||||
ecxCreatedAt time.Time
|
ecxCreatedAt time.Time
|
||||||
@@ -51,8 +52,20 @@ func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection
|
|||||||
indexBaseFileName := EcShardFileName(collection, dirIdx, int(vid))
|
indexBaseFileName := EcShardFileName(collection, dirIdx, int(vid))
|
||||||
|
|
||||||
// open ecx file
|
// open ecx file
|
||||||
|
ev.ecxActualDir = dirIdx
|
||||||
if ev.ecxFile, err = os.OpenFile(indexBaseFileName+".ecx", os.O_RDWR, 0644); err != nil {
|
if ev.ecxFile, err = os.OpenFile(indexBaseFileName+".ecx", os.O_RDWR, 0644); err != nil {
|
||||||
return nil, fmt.Errorf("cannot open ec volume index %s.ecx: %v", indexBaseFileName, err)
|
if dirIdx != dir && os.IsNotExist(err) {
|
||||||
|
// fall back to data directory if idx directory does not have the .ecx file
|
||||||
|
firstErr := err
|
||||||
|
glog.V(1).Infof("ecx file not found at %s.ecx, falling back to %s.ecx", indexBaseFileName, dataBaseFileName)
|
||||||
|
if ev.ecxFile, err = os.OpenFile(dataBaseFileName+".ecx", os.O_RDWR, 0644); err != nil {
|
||||||
|
return nil, fmt.Errorf("open ecx index %s.ecx: %v; fallback %s.ecx: %v", indexBaseFileName, firstErr, dataBaseFileName, err)
|
||||||
|
}
|
||||||
|
indexBaseFileName = dataBaseFileName
|
||||||
|
ev.ecxActualDir = dir
|
||||||
|
} else {
|
||||||
|
return nil, fmt.Errorf("cannot open ec volume index %s.ecx: %v", indexBaseFileName, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ecxFi, statErr := ev.ecxFile.Stat()
|
ecxFi, statErr := ev.ecxFile.Stat()
|
||||||
if statErr != nil {
|
if statErr != nil {
|
||||||
@@ -197,7 +210,7 @@ func (ev *EcVolume) Destroy() {
|
|||||||
func (ev *EcVolume) FileName(ext string) string {
|
func (ev *EcVolume) FileName(ext string) string {
|
||||||
switch ext {
|
switch ext {
|
||||||
case ".ecx", ".ecj":
|
case ".ecx", ".ecj":
|
||||||
return ev.IndexBaseFileName() + ext
|
return EcShardFileName(ev.Collection, ev.ecxActualDir, int(ev.VolumeId)) + ext
|
||||||
}
|
}
|
||||||
// .vif
|
// .vif
|
||||||
return ev.DataBaseFileName() + ext
|
return ev.DataBaseFileName() + ext
|
||||||
|
|||||||
Reference in New Issue
Block a user