optimiz commitig compact (#3388)

* optimiz vacuuming volume

* fix bugx

* rename parameters

* fix conflict

* change copyDataBasedOnIndexFile to an instance method

* close needlemap

* optimiz commiting Vacuum volume for  leveldb index

* fix bugs

* fix leveldb loading bugs

* refactor

* fix leveldb loading bug

* add leveldb recovery

* add test case for levelDB

* modify test case to cover all the new branches

* use one tmpNm instead of two instances

* refactor

* refactor

* move setWatermark to the end

* add test for watermark and updating leveldb

* fix error logic

* refactor, add test

* check nil before close needlemapeer
add test case
fix metric bug

* add tests, fix bugs

* adjust log level
remove wrong test case
refactor

* avoid duplicate  updating metric for leveldb index
This commit is contained in:
Guo Lei
2022-08-24 14:53:35 +08:00
committed by GitHub
parent 10414fd81c
commit c57c79a0ab
8 changed files with 304 additions and 37 deletions

View File

@@ -13,6 +13,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
. "github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/util"
)
@@ -88,7 +89,7 @@ func (v *Volume) Compact2(preallocate int64, compactionBytePerSecond int64, prog
if err := v.nm.Sync(); err != nil {
glog.V(0).Infof("compact2 fail to sync volume idx %d: %v", v.Id, err)
}
return copyDataBasedOnIndexFile(
return v.copyDataBasedOnIndexFile(
v.FileName(".dat"), v.FileName(".idx"),
v.FileName(".cpd"), v.FileName(".cpx"),
v.SuperBlock,
@@ -114,7 +115,10 @@ func (v *Volume) CommitCompact() error {
defer v.dataFileAccessLock.Unlock()
glog.V(3).Infof("Got volume %d committing lock...", v.Id)
v.nm.Close()
if v.nm != nil {
v.nm.Close()
v.nm = nil
}
if v.DataBackend != nil {
if err := v.DataBackend.Close(); err != nil {
glog.V(0).Infof("fail to close volume %d", v.Id)
@@ -163,6 +167,7 @@ func (v *Volume) CommitCompact() error {
if e = v.load(true, false, v.needleMapKind, 0); e != nil {
return e
}
glog.V(3).Infof("Finish commiting volume %d", v.Id)
return nil
}
@@ -171,12 +176,16 @@ func (v *Volume) cleanupCompact() error {
e1 := os.Remove(v.FileName(".cpd"))
e2 := os.Remove(v.FileName(".cpx"))
e3 := os.RemoveAll(v.FileName(".cpldb"))
if e1 != nil && !os.IsNotExist(e1) {
return e1
}
if e2 != nil && !os.IsNotExist(e2) {
return e2
}
if e3 != nil && !os.IsNotExist(e3) {
return e3
}
return nil
}
@@ -210,7 +219,15 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI
return fmt.Errorf("verifyIndexFileIntegrity %s failed: %v", oldIdxFileName, err)
}
if indexSize == 0 || uint64(indexSize) <= v.lastCompactIndexOffset {
return nil
if v.needleMapKind == NeedleMapInMemory {
return nil
}
newIdx, err := os.OpenFile(newIdxFileName, os.O_RDWR, 0644)
if err != nil {
return fmt.Errorf("open idx file %s failed: %v", newIdxFileName, err)
}
defer newIdx.Close()
return v.tmpNm.UpdateNeedleMapMetric(newIdx)
}
// fail if the old .dat file has changed to a new revision
@@ -261,7 +278,13 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI
if idx, err = os.OpenFile(newIdxFileName, os.O_RDWR, 0644); err != nil {
return fmt.Errorf("open idx file %s failed: %v", newIdxFileName, err)
}
defer idx.Close()
stat, err := idx.Stat()
if err != nil {
return fmt.Errorf("stat file %s: %v", idx.Name(), err)
}
idxSize := stat.Size()
var newDatCompactRevision uint16
newDatCompactRevision, err = fetchCompactRevisionFromDatFile(dstDatBackend)
@@ -289,7 +312,6 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI
return
}
}
//updated needle
if !increIdxEntry.offset.IsZero() && increIdxEntry.size != 0 && increIdxEntry.size.IsValid() {
//even the needle cache in memory is hit, the need_bytes is correct
@@ -327,7 +349,7 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI
}
}
return nil
return v.tmpNm.DoOffsetLoading(v, idx, uint64(idxSize)/types.NeedleMapEntrySize)
}
type VolumeFileScanner4Vacuum struct {
@@ -400,7 +422,7 @@ func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, prealloca
return nm.SaveToIdx(idxName)
}
func copyDataBasedOnIndexFile(srcDatName, srcIdxName, dstDatName, datIdxName string, sb super_block.SuperBlock, version needle.Version, preallocate, compactionBytePerSecond int64, progressFn ProgressFunc) (err error) {
func (v *Volume) copyDataBasedOnIndexFile(srcDatName, srcIdxName, dstDatName, datIdxName string, sb super_block.SuperBlock, version needle.Version, preallocate, compactionBytePerSecond int64, progressFn ProgressFunc) (err error) {
var (
srcDatBackend, dstDatBackend backend.BackendStorageFile
dataFile *os.File
@@ -430,7 +452,6 @@ func copyDataBasedOnIndexFile(srcDatName, srcIdxName, dstDatName, datIdxName str
newOffset := int64(sb.BlockSize())
writeThrottler := util.NewWriteThrottler(compactionBytePerSecond)
err = oldNm.AscendingVisit(func(value needle_map.NeedleValue) error {
offset, size := value.Offset, value.Size
@@ -471,6 +492,41 @@ func copyDataBasedOnIndexFile(srcDatName, srcIdxName, dstDatName, datIdxName str
return err
}
return newNm.SaveToIdx(datIdxName)
err = newNm.SaveToIdx(datIdxName)
if err != nil {
return err
}
indexFile, err := os.OpenFile(datIdxName, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
glog.Errorf("cannot open Volume Index %s: %v", datIdxName, err)
return err
}
defer indexFile.Close()
if v.tmpNm != nil {
v.tmpNm.Close()
v.tmpNm = nil
}
if v.needleMapKind == NeedleMapInMemory {
nm := &NeedleMap{
m: needle_map.NewCompactMap(),
}
v.tmpNm = nm
//can be optimized, filling nm in oldNm.AscendingVisit
err = v.tmpNm.DoOffsetLoading(nil, indexFile, 0)
return err
} else {
dbFileName := v.FileName(".ldb")
m := &LevelDbNeedleMap{dbFileName: dbFileName}
m.dbFileName = dbFileName
mm := &mapMetric{}
m.mapMetric = *mm
v.tmpNm = m
err = v.tmpNm.DoOffsetLoading(v, indexFile, 0)
if err != nil {
return err
}
}
return
}