more solid weed mount (#4089)

* compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2023-01-02 23:20:45 -08:00
parent 367353b936
commit d4566d4aaa
45 changed files with 1834 additions and 805 deletions
--- a/weed/filer/filechunks_test.go
+++ b/weed/filer/filechunks_test.go
@@ -92,7 +92,8 @@ func TestRandomFileChunksCompact(t *testing.T) {

 	visibles, _ := NonOverlappingVisibleIntervals(nil, chunks, 0, math.MaxInt64)

-	for _, v := range visibles {
+	for visible := visibles.Front(); visible != nil; visible = visible.Next {
+		v := visible.Value
 		for x := v.start; x < v.stop; x++ {
 			assert.Equal(t, strconv.Itoa(int(data[x])), v.fileId)
 		}
@@ -137,7 +138,7 @@ func TestIntervalMerging(t *testing.T) {
 			},
 			Expected: []*VisibleInterval{
 				{start: 0, stop: 70, fileId: "b"},
-				{start: 70, stop: 100, fileId: "a", chunkOffset: 70},
+				{start: 70, stop: 100, fileId: "a", offsetInChunk: 70},
 			},
 		},
 		// case 3: updates overwrite full chunks
@@ -174,15 +175,15 @@ func TestIntervalMerging(t *testing.T) {
 			},
 			Expected: []*VisibleInterval{
 				{start: 0, stop: 200, fileId: "d"},
-				{start: 200, stop: 220, fileId: "c", chunkOffset: 130},
+				{start: 200, stop: 220, fileId: "c", offsetInChunk: 130},
 			},
 		},
 		// case 6: same updates
 		{
 			Chunks: []*filer_pb.FileChunk{
 				{Offset: 0, Size: 100, FileId: "abc", Fid: &filer_pb.FileId{FileKey: 1}, ModifiedTsNs: 123},
-				{Offset: 0, Size: 100, FileId: "axf", Fid: &filer_pb.FileId{FileKey: 2}, ModifiedTsNs: 123},
-				{Offset: 0, Size: 100, FileId: "xyz", Fid: &filer_pb.FileId{FileKey: 3}, ModifiedTsNs: 123},
+				{Offset: 0, Size: 100, FileId: "axf", Fid: &filer_pb.FileId{FileKey: 2}, ModifiedTsNs: 124},
+				{Offset: 0, Size: 100, FileId: "xyz", Fid: &filer_pb.FileId{FileKey: 3}, ModifiedTsNs: 125},
 			},
 			Expected: []*VisibleInterval{
 				{start: 0, stop: 100, fileId: "xyz"},
@@ -228,11 +229,17 @@ func TestIntervalMerging(t *testing.T) {
 	for i, testcase := range testcases {
 		log.Printf("++++++++++ merged test case %d ++++++++++++++++++++", i)
 		intervals, _ := NonOverlappingVisibleIntervals(nil, testcase.Chunks, 0, math.MaxInt64)
-		for x, interval := range intervals {
-			log.Printf("test case %d, interval %d, start=%d, stop=%d, fileId=%s",
-				i, x, interval.start, interval.stop, interval.fileId)
+		x := -1
+		for visible := intervals.Front(); visible != nil; visible = visible.Next {
+			x++
+			interval := visible.Value
+			log.Printf("test case %d, interval start=%d, stop=%d, fileId=%s",
+				i, interval.start, interval.stop, interval.fileId)
 		}
-		for x, interval := range intervals {
+		x = -1
+		for visible := intervals.Front(); visible != nil; visible = visible.Next {
+			x++
+			interval := visible.Value
 			if interval.start != testcase.Expected[x].start {
 				t.Fatalf("failed on test case %d, interval %d, start %d, expect %d",
 					i, x, interval.start, testcase.Expected[x].start)
@@ -245,13 +252,13 @@ func TestIntervalMerging(t *testing.T) {
 				t.Fatalf("failed on test case %d, interval %d, chunkId %s, expect %s",
 					i, x, interval.fileId, testcase.Expected[x].fileId)
 			}
-			if interval.chunkOffset != testcase.Expected[x].chunkOffset {
-				t.Fatalf("failed on test case %d, interval %d, chunkOffset %d, expect %d",
-					i, x, interval.chunkOffset, testcase.Expected[x].chunkOffset)
+			if interval.offsetInChunk != testcase.Expected[x].offsetInChunk {
+				t.Fatalf("failed on test case %d, interval %d, offsetInChunk %d, expect %d",
+					i, x, interval.offsetInChunk, testcase.Expected[x].offsetInChunk)
 			}
 		}
-		if len(intervals) != len(testcase.Expected) {
-			t.Fatalf("failed to compact test case %d, len %d expected %d", i, len(intervals), len(testcase.Expected))
+		if intervals.Len() != len(testcase.Expected) {
+			t.Fatalf("failed to compact test case %d, len %d expected %d", i, intervals.Len(), len(testcase.Expected))
 		}

 	}
@@ -276,9 +283,9 @@ func TestChunksReading(t *testing.T) {
 			Offset: 0,
 			Size:   250,
 			Expected: []*ChunkView{
-				{Offset: 0, Size: 100, FileId: "abc", LogicOffset: 0},
-				{Offset: 0, Size: 100, FileId: "asdf", LogicOffset: 100},
-				{Offset: 0, Size: 50, FileId: "fsad", LogicOffset: 200},
+				{OffsetInChunk: 0, ViewSize: 100, FileId: "abc", ViewOffset: 0},
+				{OffsetInChunk: 0, ViewSize: 100, FileId: "asdf", ViewOffset: 100},
+				{OffsetInChunk: 0, ViewSize: 50, FileId: "fsad", ViewOffset: 200},
 			},
 		},
 		// case 1: updates overwrite full chunks
@@ -290,7 +297,7 @@ func TestChunksReading(t *testing.T) {
 			Offset: 50,
 			Size:   100,
 			Expected: []*ChunkView{
-				{Offset: 50, Size: 100, FileId: "asdf", LogicOffset: 50},
+				{OffsetInChunk: 50, ViewSize: 100, FileId: "asdf", ViewOffset: 50},
 			},
 		},
 		// case 2: updates overwrite part of previous chunks
@@ -302,8 +309,8 @@ func TestChunksReading(t *testing.T) {
 			Offset: 30,
 			Size:   40,
 			Expected: []*ChunkView{
-				{Offset: 20, Size: 30, FileId: "b", LogicOffset: 30},
-				{Offset: 57, Size: 10, FileId: "a", LogicOffset: 60},
+				{OffsetInChunk: 20, ViewSize: 30, FileId: "b", ViewOffset: 30},
+				{OffsetInChunk: 57, ViewSize: 10, FileId: "a", ViewOffset: 60},
 			},
 		},
 		// case 3: updates overwrite full chunks
@@ -316,8 +323,8 @@ func TestChunksReading(t *testing.T) {
 			Offset: 0,
 			Size:   200,
 			Expected: []*ChunkView{
-				{Offset: 0, Size: 50, FileId: "asdf", LogicOffset: 0},
-				{Offset: 0, Size: 150, FileId: "xxxx", LogicOffset: 50},
+				{OffsetInChunk: 0, ViewSize: 50, FileId: "asdf", ViewOffset: 0},
+				{OffsetInChunk: 0, ViewSize: 150, FileId: "xxxx", ViewOffset: 50},
 			},
 		},
 		// case 4: updates far away from prev chunks
@@ -330,8 +337,8 @@ func TestChunksReading(t *testing.T) {
 			Offset: 0,
 			Size:   400,
 			Expected: []*ChunkView{
-				{Offset: 0, Size: 200, FileId: "asdf", LogicOffset: 0},
-				{Offset: 0, Size: 150, FileId: "xxxx", LogicOffset: 250},
+				{OffsetInChunk: 0, ViewSize: 200, FileId: "asdf", ViewOffset: 0},
+				{OffsetInChunk: 0, ViewSize: 150, FileId: "xxxx", ViewOffset: 250},
 			},
 		},
 		// case 5: updates overwrite full chunks
@@ -345,21 +352,21 @@ func TestChunksReading(t *testing.T) {
 			Offset: 0,
 			Size:   220,
 			Expected: []*ChunkView{
-				{Offset: 0, Size: 200, FileId: "c", LogicOffset: 0},
-				{Offset: 130, Size: 20, FileId: "b", LogicOffset: 200},
+				{OffsetInChunk: 0, ViewSize: 200, FileId: "c", ViewOffset: 0},
+				{OffsetInChunk: 130, ViewSize: 20, FileId: "b", ViewOffset: 200},
 			},
 		},
 		// case 6: same updates
 		{
 			Chunks: []*filer_pb.FileChunk{
 				{Offset: 0, Size: 100, FileId: "abc", Fid: &filer_pb.FileId{FileKey: 1}, ModifiedTsNs: 123},
-				{Offset: 0, Size: 100, FileId: "def", Fid: &filer_pb.FileId{FileKey: 2}, ModifiedTsNs: 123},
-				{Offset: 0, Size: 100, FileId: "xyz", Fid: &filer_pb.FileId{FileKey: 3}, ModifiedTsNs: 123},
+				{Offset: 0, Size: 100, FileId: "def", Fid: &filer_pb.FileId{FileKey: 2}, ModifiedTsNs: 124},
+				{Offset: 0, Size: 100, FileId: "xyz", Fid: &filer_pb.FileId{FileKey: 3}, ModifiedTsNs: 125},
 			},
 			Offset: 0,
 			Size:   100,
 			Expected: []*ChunkView{
-				{Offset: 0, Size: 100, FileId: "xyz", LogicOffset: 0},
+				{OffsetInChunk: 0, ViewSize: 100, FileId: "xyz", ViewOffset: 0},
 			},
 		},
 		// case 7: edge cases
@@ -372,8 +379,8 @@ func TestChunksReading(t *testing.T) {
 			Offset: 0,
 			Size:   200,
 			Expected: []*ChunkView{
-				{Offset: 0, Size: 100, FileId: "abc", LogicOffset: 0},
-				{Offset: 0, Size: 100, FileId: "asdf", LogicOffset: 100},
+				{OffsetInChunk: 0, ViewSize: 100, FileId: "abc", ViewOffset: 0},
+				{OffsetInChunk: 0, ViewSize: 100, FileId: "asdf", ViewOffset: 100},
 			},
 		},
 		// case 8: edge cases
@@ -386,9 +393,9 @@ func TestChunksReading(t *testing.T) {
 			Offset: 0,
 			Size:   300,
 			Expected: []*ChunkView{
-				{Offset: 0, Size: 90, FileId: "abc", LogicOffset: 0},
-				{Offset: 0, Size: 100, FileId: "asdf", LogicOffset: 90},
-				{Offset: 0, Size: 110, FileId: "fsad", LogicOffset: 190},
+				{OffsetInChunk: 0, ViewSize: 90, FileId: "abc", ViewOffset: 0},
+				{OffsetInChunk: 0, ViewSize: 100, FileId: "asdf", ViewOffset: 90},
+				{OffsetInChunk: 0, ViewSize: 110, FileId: "fsad", ViewOffset: 190},
 			},
 		},
 		// case 9: edge cases
@@ -404,12 +411,12 @@ func TestChunksReading(t *testing.T) {
 			Offset: 0,
 			Size:   153578836,
 			Expected: []*ChunkView{
-				{Offset: 0, Size: 43175936, FileId: "2,111fc2cbfac1", LogicOffset: 0},
-				{Offset: 0, Size: 52981760 - 43175936, FileId: "2,112a36ea7f85", LogicOffset: 43175936},
-				{Offset: 0, Size: 72564736 - 52981760, FileId: "4,112d5f31c5e7", LogicOffset: 52981760},
-				{Offset: 0, Size: 133255168 - 72564736, FileId: "1,113245f0cdb6", LogicOffset: 72564736},
-				{Offset: 0, Size: 137269248 - 133255168, FileId: "3,1141a70733b5", LogicOffset: 133255168},
-				{Offset: 0, Size: 153578836 - 137269248, FileId: "1,114201d5bbdb", LogicOffset: 137269248},
+				{OffsetInChunk: 0, ViewSize: 43175936, FileId: "2,111fc2cbfac1", ViewOffset: 0},
+				{OffsetInChunk: 0, ViewSize: 52981760 - 43175936, FileId: "2,112a36ea7f85", ViewOffset: 43175936},
+				{OffsetInChunk: 0, ViewSize: 72564736 - 52981760, FileId: "4,112d5f31c5e7", ViewOffset: 52981760},
+				{OffsetInChunk: 0, ViewSize: 133255168 - 72564736, FileId: "1,113245f0cdb6", ViewOffset: 72564736},
+				{OffsetInChunk: 0, ViewSize: 137269248 - 133255168, FileId: "3,1141a70733b5", ViewOffset: 133255168},
+				{OffsetInChunk: 0, ViewSize: 153578836 - 137269248, FileId: "1,114201d5bbdb", ViewOffset: 137269248},
 			},
 		},
 	}
@@ -420,28 +427,31 @@ func TestChunksReading(t *testing.T) {
 		}
 		log.Printf("++++++++++ read test case %d ++++++++++++++++++++", i)
 		chunks := ViewFromChunks(nil, testcase.Chunks, testcase.Offset, testcase.Size)
-		for x, chunk := range chunks {
+		x := -1
+		for c := chunks.Front(); c != nil; c = c.Next {
+			x++
+			chunk := c.Value
 			log.Printf("read case %d, chunk %d, offset=%d, size=%d, fileId=%s",
-				i, x, chunk.Offset, chunk.Size, chunk.FileId)
-			if chunk.Offset != testcase.Expected[x].Offset {
+				i, x, chunk.OffsetInChunk, chunk.ViewSize, chunk.FileId)
+			if chunk.OffsetInChunk != testcase.Expected[x].OffsetInChunk {
 				t.Fatalf("failed on read case %d, chunk %s, Offset %d, expect %d",
-					i, chunk.FileId, chunk.Offset, testcase.Expected[x].Offset)
+					i, chunk.FileId, chunk.OffsetInChunk, testcase.Expected[x].OffsetInChunk)
 			}
-			if chunk.Size != testcase.Expected[x].Size {
-				t.Fatalf("failed on read case %d, chunk %s, Size %d, expect %d",
-					i, chunk.FileId, chunk.Size, testcase.Expected[x].Size)
+			if chunk.ViewSize != testcase.Expected[x].ViewSize {
+				t.Fatalf("failed on read case %d, chunk %s, ViewSize %d, expect %d",
+					i, chunk.FileId, chunk.ViewSize, testcase.Expected[x].ViewSize)
 			}
 			if chunk.FileId != testcase.Expected[x].FileId {
 				t.Fatalf("failed on read case %d, chunk %d, FileId %s, expect %s",
 					i, x, chunk.FileId, testcase.Expected[x].FileId)
 			}
-			if chunk.LogicOffset != testcase.Expected[x].LogicOffset {
-				t.Fatalf("failed on read case %d, chunk %d, LogicOffset %d, expect %d",
-					i, x, chunk.LogicOffset, testcase.Expected[x].LogicOffset)
+			if chunk.ViewOffset != testcase.Expected[x].ViewOffset {
+				t.Fatalf("failed on read case %d, chunk %d, ViewOffset %d, expect %d",
+					i, x, chunk.ViewOffset, testcase.Expected[x].ViewOffset)
 			}
 		}
-		if len(chunks) != len(testcase.Expected) {
-			t.Fatalf("failed to read test case %d, len %d expected %d", i, len(chunks), len(testcase.Expected))
+		if chunks.Len() != len(testcase.Expected) {
+			t.Fatalf("failed to read test case %d, len %d expected %d", i, chunks.Len(), len(testcase.Expected))
 		}
 	}

@@ -467,73 +477,79 @@ func BenchmarkCompactFileChunks(b *testing.B) {
 	}
 }

+func addVisibleInterval(visibles *IntervalList[*VisibleInterval], x *VisibleInterval) {
+	visibles.AppendInterval(&Interval[*VisibleInterval]{
+		StartOffset: x.start,
+		StopOffset:  x.stop,
+		TsNs:        x.modifiedTsNs,
+		Value:       x,
+	})
+}
+
 func TestViewFromVisibleIntervals(t *testing.T) {
-	visibles := []VisibleInterval{
-		{
-			start:  0,
-			stop:   25,
-			fileId: "fid1",
-		},
-		{
-			start:  4096,
-			stop:   8192,
-			fileId: "fid2",
-		},
-		{
-			start:  16384,
-			stop:   18551,
-			fileId: "fid3",
-		},
-	}
+	visibles := NewIntervalList[*VisibleInterval]()
+	addVisibleInterval(visibles, &VisibleInterval{
+		start:  0,
+		stop:   25,
+		fileId: "fid1",
+	})
+	addVisibleInterval(visibles, &VisibleInterval{
+		start:  4096,
+		stop:   8192,
+		fileId: "fid2",
+	})
+	addVisibleInterval(visibles, &VisibleInterval{
+		start:  16384,
+		stop:   18551,
+		fileId: "fid3",
+	})

 	views := ViewFromVisibleIntervals(visibles, 0, math.MaxInt32)

-	if len(views) != len(visibles) {
-		assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
+	if views.Len() != visibles.Len() {
+		assert.Equal(t, visibles.Len(), views.Len(), "ViewFromVisibleIntervals error")
 	}

 }

 func TestViewFromVisibleIntervals2(t *testing.T) {
-	visibles := []VisibleInterval{
-		{
-			start:  344064,
-			stop:   348160,
-			fileId: "fid1",
-		},
-		{
-			start:  348160,
-			stop:   356352,
-			fileId: "fid2",
-		},
-	}
+	visibles := NewIntervalList[*VisibleInterval]()
+	addVisibleInterval(visibles, &VisibleInterval{
+		start:  344064,
+		stop:   348160,
+		fileId: "fid1",
+	})
+	addVisibleInterval(visibles, &VisibleInterval{
+		start:  348160,
+		stop:   356352,
+		fileId: "fid2",
+	})

 	views := ViewFromVisibleIntervals(visibles, 0, math.MaxInt32)

-	if len(views) != len(visibles) {
-		assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
+	if views.Len() != visibles.Len() {
+		assert.Equal(t, visibles.Len(), views.Len(), "ViewFromVisibleIntervals error")
 	}

 }

 func TestViewFromVisibleIntervals3(t *testing.T) {
-	visibles := []VisibleInterval{
-		{
-			start:  1000,
-			stop:   2000,
-			fileId: "fid1",
-		},
-		{
-			start:  3000,
-			stop:   4000,
-			fileId: "fid2",
-		},
-	}
+	visibles := NewIntervalList[*VisibleInterval]()
+	addVisibleInterval(visibles, &VisibleInterval{
+		start:  1000,
+		stop:   2000,
+		fileId: "fid1",
+	})
+	addVisibleInterval(visibles, &VisibleInterval{
+		start:  3000,
+		stop:   4000,
+		fileId: "fid2",
+	})

 	views := ViewFromVisibleIntervals(visibles, 1700, 1500)

-	if len(views) != len(visibles) {
-		assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
+	if views.Len() != visibles.Len() {
+		assert.Equal(t, visibles.Len(), views.Len(), "ViewFromVisibleIntervals error")
 	}

 }