Feature limit caching to prescribed number of bytes per file (#6009)

* feature: we can check if a fileId is already in the cache

We using this to protect cache from adding the same needle to
the cache over and over.

* fuse mount: Do not start dowloader if needle is already in the cache

* added maxFilePartSizeInCache property to ChunkCache

If file very large only first maxFilePartSizeInCache bytes
are going to be put to the cache (subject to the needle size
constrains).

* feature: for large files put in cache no more than prescribed number of bytes

Before this patch only the first needle of a large file was intended for
caching. This patch uses maximum prescribed amount of bytes to be put in
cache. This allows to bypass default 2MB maximum for a file part stored
in the cache.

* added dummy mock methods to satisfy interfaces of ChunkCache
This commit is contained in:
Eugeniy E. Mikhailov
2024-09-12 00:09:20 -04:00
committed by GitHub
parent 151f2ff7a9
commit dab0bb8097
4 changed files with 61 additions and 2 deletions

View File

@@ -6,6 +6,7 @@ import (
"sync/atomic"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"github.com/seaweedfs/seaweedfs/weed/util/mem"
@@ -61,6 +62,10 @@ func (rc *ReaderCache) MaybeCache(chunkViews *Interval[*ChunkView]) {
if _, found := rc.downloaders[chunkView.FileId]; found {
continue
}
if rc.chunkCache.IsInCache(chunkView.FileId, true) {
glog.V(4).Infof("%s is in cache", chunkView.FileId)
continue
}
if len(rc.downloaders) >= rc.limit {
// abort when slots are filled
@@ -69,7 +74,7 @@ func (rc *ReaderCache) MaybeCache(chunkViews *Interval[*ChunkView]) {
// glog.V(4).Infof("prefetch %s offset %d", chunkView.FileId, chunkView.ViewOffset)
// cache this chunk if not yet
cacher := newSingleChunkCacher(rc, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int(chunkView.ChunkSize), chunkView.ViewOffset == 0)
cacher := newSingleChunkCacher(rc, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int(chunkView.ChunkSize), (uint64(chunkView.ViewOffset)+chunkView.ChunkSize) <= rc.chunkCache.GetMaxFilePartSizeInCache())
go cacher.startCaching()
<-cacher.cacheStartedCh
rc.downloaders[chunkView.FileId] = cacher