more solid weed mount (#4089)

* compare chunks by timestamp

* fix slab clearing error

* fix test compilation

* move oldest chunk to sealed, instead of by fullness

* lock on fh.entryViewCache

* remove verbose logs

* revert slat clearing

* less logs

* less logs

* track write and read by timestamp

* remove useless logic

* add entry lock on file handle release

* use mem chunk only, swap file chunk has problems

* comment out code that maybe used later

* add debug mode to compare data read and write

* more efficient readResolvedChunks with linked list

* small optimization

* fix test compilation

* minor fix on writer

* add SeparateGarbageChunks

* group chunks into sections

* turn off debug mode

* fix tests

* fix tests

* tmp enable swap file chunk

* Revert "tmp enable swap file chunk"

This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7.

* simple refactoring

* simple refactoring

* do not re-use swap file chunk. Sealed chunks should not be re-used.

* comment out debugging facilities

* either mem chunk or swap file chunk is fine now

* remove orderedMutex  as *semaphore.Weighted

not found impactful

* optimize size calculation for changing large files

* optimize performance to avoid going through the long list of chunks

* still problems with swap file chunk

* rename

* tiny optimization

* swap file chunk save only successfully read data

* fix

* enable both mem and swap file chunk

* resolve chunks with range

* rename

* fix chunk interval list

* also change file handle chunk group when adding chunks

* pick in-active chunk with time-decayed counter

* fix compilation

* avoid nil with empty fh.entry

* refactoring

* rename

* rename

* refactor visible intervals to *list.List

* refactor chunkViews to *list.List

* add IntervalList for generic interval list

* change visible interval to use IntervalList in generics

* cahnge chunkViews to *IntervalList[*ChunkView]

* use NewFileChunkSection to create

* rename variables

* refactor

* fix renaming leftover

* renaming

* renaming

* add insert interval

* interval list adds lock

* incrementally add chunks to readers

Fixes:
1. set start and stop offset for the value object
2. clone the value object
3. use pointer instead of copy-by-value when passing to interval.Value
4. use insert interval since adding chunk could be out of order

* fix tests compilation

* fix tests compilation
This commit is contained in:
Chris Lu
2023-01-02 23:20:45 -08:00
committed by GitHub
parent 367353b936
commit d4566d4aaa
45 changed files with 1834 additions and 805 deletions

View File

@@ -15,12 +15,12 @@ var (
type ActualChunkIndex int
type SwapFile struct {
dir string
file *os.File
logicToActualChunkIndex map[LogicChunkIndex]ActualChunkIndex
logicToActualChunkIndexLock sync.Mutex
chunkSize int64
freeActualChunkList []ActualChunkIndex
dir string
file *os.File
chunkSize int64
chunkTrackingLock sync.Mutex
activeChunkCount int
freeActualChunkList []ActualChunkIndex
}
type SwapFileChunk struct {
@@ -29,14 +29,15 @@ type SwapFileChunk struct {
usage *ChunkWrittenIntervalList
logicChunkIndex LogicChunkIndex
actualChunkIndex ActualChunkIndex
activityScore *ActivityScore
//memChunk *MemChunk
}
func NewSwapFile(dir string, chunkSize int64) *SwapFile {
return &SwapFile{
dir: dir,
file: nil,
logicToActualChunkIndex: make(map[LogicChunkIndex]ActualChunkIndex),
chunkSize: chunkSize,
dir: dir,
file: nil,
chunkSize: chunkSize,
}
}
func (sf *SwapFile) FreeResource() {
@@ -46,7 +47,7 @@ func (sf *SwapFile) FreeResource() {
}
}
func (sf *SwapFile) NewTempFileChunk(logicChunkIndex LogicChunkIndex) (tc *SwapFileChunk) {
func (sf *SwapFile) NewSwapFileChunk(logicChunkIndex LogicChunkIndex) (tc *SwapFileChunk) {
if sf.file == nil {
var err error
sf.file, err = os.CreateTemp(sf.dir, "")
@@ -55,70 +56,98 @@ func (sf *SwapFile) NewTempFileChunk(logicChunkIndex LogicChunkIndex) (tc *SwapF
return nil
}
}
sf.logicToActualChunkIndexLock.Lock()
defer sf.logicToActualChunkIndexLock.Unlock()
actualChunkIndex, found := sf.logicToActualChunkIndex[logicChunkIndex]
if !found {
if len(sf.freeActualChunkList) > 0 {
actualChunkIndex = sf.freeActualChunkList[0]
sf.freeActualChunkList = sf.freeActualChunkList[1:]
} else {
actualChunkIndex = ActualChunkIndex(len(sf.logicToActualChunkIndex))
}
sf.logicToActualChunkIndex[logicChunkIndex] = actualChunkIndex
sf.chunkTrackingLock.Lock()
defer sf.chunkTrackingLock.Unlock()
sf.activeChunkCount++
// assign a new physical chunk
var actualChunkIndex ActualChunkIndex
if len(sf.freeActualChunkList) > 0 {
actualChunkIndex = sf.freeActualChunkList[0]
sf.freeActualChunkList = sf.freeActualChunkList[1:]
} else {
actualChunkIndex = ActualChunkIndex(sf.activeChunkCount)
}
return &SwapFileChunk{
swapFileChunk := &SwapFileChunk{
swapfile: sf,
usage: newChunkWrittenIntervalList(),
logicChunkIndex: logicChunkIndex,
actualChunkIndex: actualChunkIndex,
activityScore: NewActivityScore(),
// memChunk: NewMemChunk(logicChunkIndex, sf.chunkSize),
}
// println(logicChunkIndex, "|", "++++", swapFileChunk.actualChunkIndex, swapFileChunk, sf)
return swapFileChunk
}
func (sc *SwapFileChunk) FreeResource() {
sc.swapfile.logicToActualChunkIndexLock.Lock()
defer sc.swapfile.logicToActualChunkIndexLock.Unlock()
sc.Lock()
defer sc.Unlock()
sc.swapfile.chunkTrackingLock.Lock()
defer sc.swapfile.chunkTrackingLock.Unlock()
sc.swapfile.freeActualChunkList = append(sc.swapfile.freeActualChunkList, sc.actualChunkIndex)
delete(sc.swapfile.logicToActualChunkIndex, sc.logicChunkIndex)
sc.swapfile.activeChunkCount--
// println(sc.logicChunkIndex, "|", "----", sc.actualChunkIndex, sc, sc.swapfile)
}
func (sc *SwapFileChunk) WriteDataAt(src []byte, offset int64) (n int) {
func (sc *SwapFileChunk) WriteDataAt(src []byte, offset int64, tsNs int64) (n int) {
sc.Lock()
defer sc.Unlock()
// println(sc.logicChunkIndex, "|", tsNs, "write at", offset, len(src), sc.actualChunkIndex)
innerOffset := offset % sc.swapfile.chunkSize
var err error
n, err = sc.swapfile.file.WriteAt(src, int64(sc.actualChunkIndex)*sc.swapfile.chunkSize+innerOffset)
if err == nil {
sc.usage.MarkWritten(innerOffset, innerOffset+int64(n))
} else {
sc.usage.MarkWritten(innerOffset, innerOffset+int64(n), tsNs)
if err != nil {
glog.Errorf("failed to write swap file %s: %v", sc.swapfile.file.Name(), err)
}
//sc.memChunk.WriteDataAt(src, offset, tsNs)
sc.activityScore.MarkWrite()
return
}
func (sc *SwapFileChunk) ReadDataAt(p []byte, off int64) (maxStop int64) {
func (sc *SwapFileChunk) ReadDataAt(p []byte, off int64, tsNs int64) (maxStop int64) {
sc.RLock()
defer sc.RUnlock()
// println(sc.logicChunkIndex, "|", tsNs, "read at", off, len(p), sc.actualChunkIndex)
//memCopy := make([]byte, len(p))
//copy(memCopy, p)
chunkStartOffset := int64(sc.logicChunkIndex) * sc.swapfile.chunkSize
for t := sc.usage.head.next; t != sc.usage.tail; t = t.next {
logicStart := max(off, chunkStartOffset+t.StartOffset)
logicStop := min(off+int64(len(p)), chunkStartOffset+t.stopOffset)
if logicStart < logicStop {
actualStart := logicStart - chunkStartOffset + int64(sc.actualChunkIndex)*sc.swapfile.chunkSize
if _, err := sc.swapfile.file.ReadAt(p[logicStart-off:logicStop-off], actualStart); err != nil {
glog.Errorf("failed to reading swap file %s: %v", sc.swapfile.file.Name(), err)
break
if t.TsNs >= tsNs {
actualStart := logicStart - chunkStartOffset + int64(sc.actualChunkIndex)*sc.swapfile.chunkSize
if _, err := sc.swapfile.file.ReadAt(p[logicStart-off:logicStop-off], actualStart); err != nil {
glog.Errorf("failed to reading swap file %s: %v", sc.swapfile.file.Name(), err)
break
}
maxStop = max(maxStop, logicStop)
} else {
println("read old data2", tsNs-t.TsNs, "ns")
}
maxStop = max(maxStop, logicStop)
}
}
//sc.memChunk.ReadDataAt(memCopy, off, tsNs)
//if bytes.Compare(memCopy, p) != 0 {
// println("read wrong data from swap file", off, sc.logicChunkIndex)
//}
sc.activityScore.MarkRead()
return
}
@@ -128,27 +157,27 @@ func (sc *SwapFileChunk) IsComplete() bool {
return sc.usage.IsComplete(sc.swapfile.chunkSize)
}
func (sc *SwapFileChunk) WrittenSize() int64 {
sc.RLock()
defer sc.RUnlock()
return sc.usage.WrittenSize()
func (sc *SwapFileChunk) ActivityScore() int64 {
return sc.activityScore.ActivityScore()
}
func (sc *SwapFileChunk) SaveContent(saveFn SaveToStorageFunc) {
sc.RLock()
defer sc.RUnlock()
if saveFn == nil {
return
}
sc.Lock()
defer sc.Unlock()
// println(sc.logicChunkIndex, "|", "save")
for t := sc.usage.head.next; t != sc.usage.tail; t = t.next {
data := mem.Allocate(int(t.Size()))
sc.swapfile.file.ReadAt(data, t.StartOffset+int64(sc.actualChunkIndex)*sc.swapfile.chunkSize)
reader := util.NewBytesReader(data)
saveFn(reader, int64(sc.logicChunkIndex)*sc.swapfile.chunkSize+t.StartOffset, t.Size(), func() {
})
n, _ := sc.swapfile.file.ReadAt(data, t.StartOffset+int64(sc.actualChunkIndex)*sc.swapfile.chunkSize)
if n > 0 {
reader := util.NewBytesReader(data[:n])
saveFn(reader, int64(sc.logicChunkIndex)*sc.swapfile.chunkSize+t.StartOffset, int64(n), t.TsNs, func() {
})
}
mem.Free(data)
}
sc.usage = newChunkWrittenIntervalList()
}