add stream writer
this should improve streaming write performance, which is common in many cases, e.g., copying large files.
This is additional to improved random read write operations: 3e69d19380...19084d8791
This commit is contained in:
@@ -4,14 +4,18 @@ import "math"
|
||||
|
||||
// ChunkWrittenInterval mark one written interval within one page chunk
|
||||
type ChunkWrittenInterval struct {
|
||||
startOffset int64
|
||||
StartOffset int64
|
||||
stopOffset int64
|
||||
prev *ChunkWrittenInterval
|
||||
next *ChunkWrittenInterval
|
||||
}
|
||||
|
||||
func (interval *ChunkWrittenInterval) Size() int64 {
|
||||
return interval.stopOffset - interval.startOffset
|
||||
return interval.stopOffset - interval.StartOffset
|
||||
}
|
||||
|
||||
func (interval *ChunkWrittenInterval) isComplete(chunkSize int64) bool {
|
||||
return interval.stopOffset-interval.StartOffset == chunkSize
|
||||
}
|
||||
|
||||
// ChunkWrittenIntervalList mark written intervals within one page chunk
|
||||
@@ -23,11 +27,11 @@ type ChunkWrittenIntervalList struct {
|
||||
func newChunkWrittenIntervalList() *ChunkWrittenIntervalList {
|
||||
list := &ChunkWrittenIntervalList{
|
||||
head: &ChunkWrittenInterval{
|
||||
startOffset: -1,
|
||||
StartOffset: -1,
|
||||
stopOffset: -1,
|
||||
},
|
||||
tail: &ChunkWrittenInterval{
|
||||
startOffset: math.MaxInt64,
|
||||
StartOffset: math.MaxInt64,
|
||||
stopOffset: math.MaxInt64,
|
||||
},
|
||||
}
|
||||
@@ -38,35 +42,40 @@ func newChunkWrittenIntervalList() *ChunkWrittenIntervalList {
|
||||
|
||||
func (list *ChunkWrittenIntervalList) MarkWritten(startOffset, stopOffset int64) {
|
||||
interval := &ChunkWrittenInterval{
|
||||
startOffset: startOffset,
|
||||
StartOffset: startOffset,
|
||||
stopOffset: stopOffset,
|
||||
}
|
||||
list.addInterval(interval)
|
||||
}
|
||||
|
||||
func (list *ChunkWrittenIntervalList) IsComplete(chunkSize int64) bool {
|
||||
return list.size() == 1 && list.head.next.isComplete(chunkSize)
|
||||
}
|
||||
|
||||
func (list *ChunkWrittenIntervalList) addInterval(interval *ChunkWrittenInterval) {
|
||||
|
||||
p := list.head
|
||||
for ; p.next != nil && p.next.startOffset <= interval.startOffset; p = p.next {
|
||||
for ; p.next != nil && p.next.StartOffset <= interval.StartOffset; p = p.next {
|
||||
}
|
||||
q := list.tail
|
||||
for ; q.prev != nil && q.prev.stopOffset >= interval.stopOffset; q = q.prev {
|
||||
}
|
||||
|
||||
if interval.startOffset <= p.stopOffset && q.startOffset <= interval.stopOffset {
|
||||
if interval.StartOffset <= p.stopOffset && q.StartOffset <= interval.stopOffset {
|
||||
// merge p and q together
|
||||
p.stopOffset = q.stopOffset
|
||||
unlinkNodesBetween(p, q.next)
|
||||
return
|
||||
}
|
||||
if interval.startOffset <= p.stopOffset {
|
||||
if interval.StartOffset <= p.stopOffset {
|
||||
// merge new interval into p
|
||||
p.stopOffset = interval.stopOffset
|
||||
unlinkNodesBetween(p, q)
|
||||
return
|
||||
}
|
||||
if q.startOffset <= interval.stopOffset {
|
||||
if q.StartOffset <= interval.stopOffset {
|
||||
// merge new interval into q
|
||||
q.startOffset = interval.startOffset
|
||||
q.StartOffset = interval.StartOffset
|
||||
unlinkNodesBetween(p, q)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ func (cw *ChunkedFileWriter) ReadDataAt(p []byte, off int64) (maxStop int64) {
|
||||
actualChunkIndex, chunkUsage := cw.toActualReadOffset(off)
|
||||
if chunkUsage != nil {
|
||||
for t := chunkUsage.head.next; t != chunkUsage.tail; t = t.next {
|
||||
logicStart := max(off, logicChunkIndex*cw.ChunkSize+t.startOffset)
|
||||
logicStart := max(off, logicChunkIndex*cw.ChunkSize+t.StartOffset)
|
||||
logicStop := min(off+int64(len(p)), logicChunkIndex*cw.ChunkSize+t.stopOffset)
|
||||
if logicStart < logicStop {
|
||||
actualStart := logicStart - logicChunkIndex*cw.ChunkSize + int64(actualChunkIndex)*cw.ChunkSize
|
||||
@@ -110,11 +110,16 @@ func (cw *ChunkedFileWriter) ProcessEachInterval(process func(file *os.File, log
|
||||
}
|
||||
}
|
||||
}
|
||||
func (cw *ChunkedFileWriter) Destroy() {
|
||||
|
||||
// Reset releases used resources
|
||||
func (cw *ChunkedFileWriter) Reset() {
|
||||
if cw.file != nil {
|
||||
cw.file.Close()
|
||||
os.Remove(cw.file.Name())
|
||||
cw.file = nil
|
||||
}
|
||||
cw.logicToActualChunkIndex = make(map[LogicChunkIndex]ActualChunkIndex)
|
||||
cw.chunkUsages = cw.chunkUsages[:0]
|
||||
}
|
||||
|
||||
type FileIntervalReader struct {
|
||||
@@ -134,7 +139,7 @@ func NewFileIntervalReader(cw *ChunkedFileWriter, logicChunkIndex LogicChunkInde
|
||||
}
|
||||
return &FileIntervalReader{
|
||||
f: cw.file,
|
||||
startOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.startOffset,
|
||||
startOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.StartOffset,
|
||||
stopOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.stopOffset,
|
||||
position: 0,
|
||||
}
|
||||
|
||||
@@ -35,9 +35,9 @@ func writeToFile(cw *ChunkedFileWriter, startOffset int64, stopOffset int64) {
|
||||
|
||||
func TestWriteChunkedFile(t *testing.T) {
|
||||
x := NewChunkedFileWriter(os.TempDir(), 20)
|
||||
defer x.Destroy()
|
||||
defer x.Reset()
|
||||
y := NewChunkedFileWriter(os.TempDir(), 12)
|
||||
defer y.Destroy()
|
||||
defer y.Reset()
|
||||
|
||||
batchSize := 4
|
||||
buf := make([]byte, batchSize)
|
||||
|
||||
119
weed/filesys/page_writer/chunked_stream_writer.go
Normal file
119
weed/filesys/page_writer/chunked_stream_writer.go
Normal file
@@ -0,0 +1,119 @@
|
||||
package page_writer
|
||||
|
||||
import (
|
||||
"github.com/chrislusf/seaweedfs/weed/util"
|
||||
"github.com/chrislusf/seaweedfs/weed/util/mem"
|
||||
"io"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
type SaveToStorageFunc func(reader io.Reader, offset int64, size int64, cleanupFn func())
|
||||
|
||||
// ChunkedStreamWriter assumes the write requests will come in within chunks and in streaming mode
|
||||
type ChunkedStreamWriter struct {
|
||||
activeChunks map[LogicChunkIndex]*MemChunk
|
||||
activeChunksLock sync.Mutex
|
||||
ChunkSize int64
|
||||
saveToStorageFn SaveToStorageFunc
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
type MemChunk struct {
|
||||
buf []byte
|
||||
usage *ChunkWrittenIntervalList
|
||||
}
|
||||
|
||||
var _ = io.WriterAt(&ChunkedStreamWriter{})
|
||||
|
||||
func NewChunkedStreamWriter(chunkSize int64) *ChunkedStreamWriter {
|
||||
return &ChunkedStreamWriter{
|
||||
ChunkSize: chunkSize,
|
||||
activeChunks: make(map[LogicChunkIndex]*MemChunk),
|
||||
}
|
||||
}
|
||||
|
||||
func (cw *ChunkedStreamWriter) SetSaveToStorageFunction(saveToStorageFn SaveToStorageFunc) {
|
||||
cw.saveToStorageFn = saveToStorageFn
|
||||
}
|
||||
|
||||
func (cw *ChunkedStreamWriter) WriteAt(p []byte, off int64) (n int, err error) {
|
||||
cw.Lock()
|
||||
defer cw.Unlock()
|
||||
|
||||
logicChunkIndex := LogicChunkIndex(off / cw.ChunkSize)
|
||||
offsetRemainder := off % cw.ChunkSize
|
||||
|
||||
memChunk, found := cw.activeChunks[logicChunkIndex]
|
||||
if !found {
|
||||
memChunk = &MemChunk{
|
||||
buf: mem.Allocate(int(cw.ChunkSize)),
|
||||
usage: newChunkWrittenIntervalList(),
|
||||
}
|
||||
cw.activeChunks[logicChunkIndex] = memChunk
|
||||
}
|
||||
n = copy(memChunk.buf[offsetRemainder:], p)
|
||||
memChunk.usage.MarkWritten(offsetRemainder, offsetRemainder+int64(n))
|
||||
if memChunk.usage.IsComplete(cw.ChunkSize) {
|
||||
if cw.saveToStorageFn != nil {
|
||||
cw.saveOneChunk(memChunk, logicChunkIndex)
|
||||
delete(cw.activeChunks, logicChunkIndex)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (cw *ChunkedStreamWriter) ReadDataAt(p []byte, off int64) (maxStop int64) {
|
||||
cw.Lock()
|
||||
defer cw.Unlock()
|
||||
|
||||
logicChunkIndex := LogicChunkIndex(off / cw.ChunkSize)
|
||||
memChunkBaseOffset := int64(logicChunkIndex) * cw.ChunkSize
|
||||
memChunk, found := cw.activeChunks[logicChunkIndex]
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
|
||||
for t := memChunk.usage.head.next; t != memChunk.usage.tail; t = t.next {
|
||||
logicStart := max(off, int64(logicChunkIndex)*cw.ChunkSize+t.StartOffset)
|
||||
logicStop := min(off+int64(len(p)), memChunkBaseOffset+t.stopOffset)
|
||||
if logicStart < logicStop {
|
||||
copy(p[logicStart-off:logicStop-off], memChunk.buf[logicStart-memChunkBaseOffset:logicStop-memChunkBaseOffset])
|
||||
maxStop = max(maxStop, logicStop)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (cw *ChunkedStreamWriter) FlushAll() {
|
||||
cw.Lock()
|
||||
defer cw.Unlock()
|
||||
for logicChunkIndex, memChunk := range cw.activeChunks {
|
||||
if cw.saveToStorageFn != nil {
|
||||
cw.saveOneChunk(memChunk, logicChunkIndex)
|
||||
delete(cw.activeChunks, logicChunkIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (cw *ChunkedStreamWriter) saveOneChunk(memChunk *MemChunk, logicChunkIndex LogicChunkIndex) {
|
||||
var referenceCounter = int32(memChunk.usage.size())
|
||||
for t := memChunk.usage.head.next; t != memChunk.usage.tail; t = t.next {
|
||||
reader := util.NewBytesReader(memChunk.buf[t.StartOffset:t.stopOffset])
|
||||
cw.saveToStorageFn(reader, int64(logicChunkIndex)*cw.ChunkSize+t.StartOffset, t.Size(), func() {
|
||||
atomic.AddInt32(&referenceCounter, -1)
|
||||
if atomic.LoadInt32(&referenceCounter) == 0 {
|
||||
mem.Free(memChunk.buf)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Reset releases used resources
|
||||
func (cw *ChunkedStreamWriter) Reset() {
|
||||
for t, memChunk := range cw.activeChunks {
|
||||
mem.Free(memChunk.buf)
|
||||
delete(cw.activeChunks, t)
|
||||
}
|
||||
}
|
||||
33
weed/filesys/page_writer/chunked_stream_writer_test.go
Normal file
33
weed/filesys/page_writer/chunked_stream_writer_test.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package page_writer
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestWriteChunkedStream(t *testing.T) {
|
||||
x := NewChunkedStreamWriter(20)
|
||||
defer x.Reset()
|
||||
y := NewChunkedFileWriter(os.TempDir(), 12)
|
||||
defer y.Reset()
|
||||
|
||||
batchSize := 4
|
||||
buf := make([]byte, batchSize)
|
||||
for i := 0; i < 256; i++ {
|
||||
for x := 0; x < batchSize; x++ {
|
||||
buf[x] = byte(i)
|
||||
}
|
||||
x.WriteAt(buf, int64(i*batchSize))
|
||||
y.WriteAt(buf, int64((255-i)*batchSize))
|
||||
}
|
||||
|
||||
a := make([]byte, 1)
|
||||
b := make([]byte, 1)
|
||||
for i := 0; i < 256*batchSize; i++ {
|
||||
x.ReadDataAt(a, int64(i))
|
||||
y.ReadDataAt(b, int64(256*batchSize-1-i))
|
||||
assert.Equal(t, a[0], b[0], "same read")
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user