more solid weed mount (#4089)
* compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
This commit is contained in:
@@ -6,7 +6,6 @@ import (
|
||||
"golang.org/x/exp/slices"
|
||||
"io"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -78,7 +77,8 @@ func StreamContentWithThrottler(masterClient wdclient.HasLookupFileIdFunction, w
|
||||
|
||||
fileId2Url := make(map[string][]string)
|
||||
|
||||
for _, chunkView := range chunkViews {
|
||||
for x := chunkViews.Front(); x != nil; x = x.Next {
|
||||
chunkView := x.Value
|
||||
var urlStrings []string
|
||||
var err error
|
||||
for _, backoff := range getLookupFileIdBackoffSchedule {
|
||||
@@ -102,29 +102,30 @@ func StreamContentWithThrottler(masterClient wdclient.HasLookupFileIdFunction, w
|
||||
|
||||
downloadThrottler := util.NewWriteThrottler(downloadMaxBytesPs)
|
||||
remaining := size
|
||||
for _, chunkView := range chunkViews {
|
||||
if offset < chunkView.LogicOffset {
|
||||
gap := chunkView.LogicOffset - offset
|
||||
for x := chunkViews.Front(); x != nil; x = x.Next {
|
||||
chunkView := x.Value
|
||||
if offset < chunkView.ViewOffset {
|
||||
gap := chunkView.ViewOffset - offset
|
||||
remaining -= gap
|
||||
glog.V(4).Infof("zero [%d,%d)", offset, chunkView.LogicOffset)
|
||||
glog.V(4).Infof("zero [%d,%d)", offset, chunkView.ViewOffset)
|
||||
err := writeZero(writer, gap)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write zero [%d,%d)", offset, chunkView.LogicOffset)
|
||||
return fmt.Errorf("write zero [%d,%d)", offset, chunkView.ViewOffset)
|
||||
}
|
||||
offset = chunkView.LogicOffset
|
||||
offset = chunkView.ViewOffset
|
||||
}
|
||||
urlStrings := fileId2Url[chunkView.FileId]
|
||||
start := time.Now()
|
||||
err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size))
|
||||
offset += int64(chunkView.Size)
|
||||
remaining -= int64(chunkView.Size)
|
||||
err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize))
|
||||
offset += int64(chunkView.ViewSize)
|
||||
remaining -= int64(chunkView.ViewSize)
|
||||
stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds())
|
||||
if err != nil {
|
||||
stats.FilerRequestCounter.WithLabelValues("chunkDownloadError").Inc()
|
||||
return fmt.Errorf("read chunk: %v", err)
|
||||
}
|
||||
stats.FilerRequestCounter.WithLabelValues("chunkDownload").Inc()
|
||||
downloadThrottler.MaybeSlowdown(int64(chunkView.Size))
|
||||
downloadThrottler.MaybeSlowdown(int64(chunkView.ViewSize))
|
||||
}
|
||||
if remaining > 0 {
|
||||
glog.V(4).Infof("zero [%d,%d)", offset, offset+remaining)
|
||||
@@ -167,14 +168,15 @@ func ReadAll(buffer []byte, masterClient *wdclient.MasterClient, chunks []*filer
|
||||
|
||||
idx := 0
|
||||
|
||||
for _, chunkView := range chunkViews {
|
||||
for x := chunkViews.Front(); x != nil; x = x.Next {
|
||||
chunkView := x.Value
|
||||
urlStrings, err := lookupFileIdFn(chunkView.FileId)
|
||||
if err != nil {
|
||||
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
|
||||
return err
|
||||
}
|
||||
|
||||
n, err := retriedFetchChunkData(buffer[idx:idx+int(chunkView.Size)], urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset)
|
||||
n, err := retriedFetchChunkData(buffer[idx:idx+int(chunkView.ViewSize)], urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -185,7 +187,7 @@ func ReadAll(buffer []byte, masterClient *wdclient.MasterClient, chunks []*filer
|
||||
|
||||
// ---------------- ChunkStreamReader ----------------------------------
|
||||
type ChunkStreamReader struct {
|
||||
chunkViews []*ChunkView
|
||||
chunkView *Interval[*ChunkView]
|
||||
totalSize int64
|
||||
logicOffset int64
|
||||
buffer []byte
|
||||
@@ -201,17 +203,15 @@ var _ = io.ReaderAt(&ChunkStreamReader{})
|
||||
func doNewChunkStreamReader(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
|
||||
|
||||
chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
|
||||
slices.SortFunc(chunkViews, func(a, b *ChunkView) bool {
|
||||
return a.LogicOffset < b.LogicOffset
|
||||
})
|
||||
|
||||
var totalSize int64
|
||||
for _, chunk := range chunkViews {
|
||||
totalSize += int64(chunk.Size)
|
||||
for x := chunkViews.Front(); x != nil; x = x.Next {
|
||||
chunk := x.Value
|
||||
totalSize += int64(chunk.ViewSize)
|
||||
}
|
||||
|
||||
return &ChunkStreamReader{
|
||||
chunkViews: chunkViews,
|
||||
chunkView: chunkViews.Front(),
|
||||
lookupFileId: lookupFileIdFn,
|
||||
totalSize: totalSize,
|
||||
}
|
||||
@@ -290,7 +290,7 @@ func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) {
|
||||
}
|
||||
|
||||
func insideChunk(offset int64, chunk *ChunkView) bool {
|
||||
return chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size)
|
||||
return chunk.ViewOffset <= offset && offset < chunk.ViewOffset+int64(chunk.ViewSize)
|
||||
}
|
||||
|
||||
func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) {
|
||||
@@ -300,48 +300,22 @@ func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) {
|
||||
}
|
||||
|
||||
// fmt.Printf("fetch for offset %d\n", offset)
|
||||
|
||||
// need to seek to a different chunk
|
||||
currentChunkIndex := sort.Search(len(c.chunkViews), func(i int) bool {
|
||||
return offset < c.chunkViews[i].LogicOffset
|
||||
})
|
||||
if currentChunkIndex == len(c.chunkViews) {
|
||||
// not found
|
||||
if insideChunk(offset, c.chunkViews[0]) {
|
||||
// fmt.Printf("select0 chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
|
||||
currentChunkIndex = 0
|
||||
} else if insideChunk(offset, c.chunkViews[len(c.chunkViews)-1]) {
|
||||
currentChunkIndex = len(c.chunkViews) - 1
|
||||
// fmt.Printf("select last chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
|
||||
} else {
|
||||
return io.EOF
|
||||
}
|
||||
} else if currentChunkIndex > 0 {
|
||||
if insideChunk(offset, c.chunkViews[currentChunkIndex]) {
|
||||
// good hit
|
||||
} else if insideChunk(offset, c.chunkViews[currentChunkIndex-1]) {
|
||||
currentChunkIndex -= 1
|
||||
// fmt.Printf("select -1 chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
|
||||
} else {
|
||||
// glog.Fatalf("unexpected1 offset %d", offset)
|
||||
return fmt.Errorf("unexpected1 offset %d", offset)
|
||||
}
|
||||
} else {
|
||||
// glog.Fatalf("unexpected2 offset %d", offset)
|
||||
return fmt.Errorf("unexpected2 offset %d", offset)
|
||||
c.chunkView = c.chunkView.Next
|
||||
if c.chunkView == nil {
|
||||
return io.EOF
|
||||
}
|
||||
|
||||
// positioning within the new chunk
|
||||
chunk := c.chunkViews[currentChunkIndex]
|
||||
chunk := c.chunkView.Value
|
||||
if insideChunk(offset, chunk) {
|
||||
if c.isBufferEmpty() || c.bufferOffset != chunk.LogicOffset {
|
||||
if c.isBufferEmpty() || c.bufferOffset != chunk.ViewOffset {
|
||||
if err = c.fetchChunkToBuffer(chunk); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// glog.Fatalf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
|
||||
return fmt.Errorf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
|
||||
// glog.Fatalf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize))
|
||||
return fmt.Errorf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize))
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -355,7 +329,7 @@ func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
|
||||
var buffer bytes.Buffer
|
||||
var shouldRetry bool
|
||||
for _, urlString := range urlStrings {
|
||||
shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
|
||||
shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize), func(data []byte) {
|
||||
buffer.Write(data)
|
||||
})
|
||||
if !shouldRetry {
|
||||
@@ -372,10 +346,10 @@ func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
|
||||
return err
|
||||
}
|
||||
c.buffer = buffer.Bytes()
|
||||
c.bufferOffset = chunkView.LogicOffset
|
||||
c.bufferOffset = chunkView.ViewOffset
|
||||
c.chunk = chunkView.FileId
|
||||
|
||||
// glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
|
||||
// glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.ViewOffset, chunkView.ViewOffset+int64(chunkView.ViewSize))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user