caching visible intervals for read

speeds up 4x in single thread mode
speeds up 30% in 32 threads mode
This commit is contained in:
Chris Lu
2018-12-30 00:51:44 -08:00
parent 55f7d8f801
commit 0388d421d2
5 changed files with 54 additions and 37 deletions

View File

@@ -33,7 +33,7 @@ func ETag(chunks []*filer_pb.FileChunk) (etag string) {
func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) {
visibles := nonOverlappingVisibleIntervals(chunks)
visibles := NonOverlappingVisibleIntervals(chunks)
fileIds := make(map[string]bool)
for _, interval := range visibles {
@@ -75,7 +75,13 @@ type ChunkView struct {
func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) {
visibles := nonOverlappingVisibleIntervals(chunks)
visibles := NonOverlappingVisibleIntervals(chunks)
return ViewFromVisibleIntervals(visibles, offset, size)
}
func ViewFromVisibleIntervals(visibles []*VisibleInterval, offset int64, size int) (views []*ChunkView) {
stop := offset + int64(size)
@@ -97,7 +103,7 @@ func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views
}
func logPrintf(name string, visibles []*visibleInterval) {
func logPrintf(name string, visibles []*VisibleInterval) {
/*
log.Printf("%s len %d", name, len(visibles))
for _, v := range visibles {
@@ -108,11 +114,11 @@ func logPrintf(name string, visibles []*visibleInterval) {
var bufPool = sync.Pool{
New: func() interface{} {
return new(visibleInterval)
return new(VisibleInterval)
},
}
func mergeIntoVisibles(visibles, newVisibles []*visibleInterval, chunk *filer_pb.FileChunk) []*visibleInterval {
func mergeIntoVisibles(visibles, newVisibles []*VisibleInterval, chunk *filer_pb.FileChunk) []*VisibleInterval {
newV := newVisibleInterval(
chunk.Offset,
@@ -173,13 +179,13 @@ func mergeIntoVisibles(visibles, newVisibles []*visibleInterval, chunk *filer_pb
return newVisibles
}
func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) {
func NonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*VisibleInterval) {
sort.Slice(chunks, func(i, j int) bool {
return chunks[i].Mtime < chunks[j].Mtime
})
var newVislbles []*visibleInterval
var newVislbles []*VisibleInterval
for _, chunk := range chunks {
newVislbles = mergeIntoVisibles(visibles, newVislbles, chunk)
t := visibles[:0]
@@ -196,7 +202,7 @@ func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*v
// find non-overlapping visible intervals
// visible interval map to one file chunk
type visibleInterval struct {
type VisibleInterval struct {
start int64
stop int64
modifiedTime int64
@@ -204,8 +210,8 @@ type visibleInterval struct {
isFullChunk bool
}
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, isFullChunk bool) *visibleInterval {
return &visibleInterval{
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, isFullChunk bool) *VisibleInterval {
return &VisibleInterval{
start: start,
stop: stop,
fileId: fileId,