From 98c93ca46587795fa03a0c52bea5be31a886be87 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 20 Feb 2021 09:44:17 -0800 Subject: [PATCH 1/6] avoid file.Stat() avoid one Syscall, but did not help on performance though --- weed/storage/backend/disk_file.go | 33 ++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/weed/storage/backend/disk_file.go b/weed/storage/backend/disk_file.go index 2b04c8df2..161f4ca83 100644 --- a/weed/storage/backend/disk_file.go +++ b/weed/storage/backend/disk_file.go @@ -1,6 +1,7 @@ package backend import ( + "github.com/chrislusf/seaweedfs/weed/glog" "os" "time" ) @@ -12,12 +13,21 @@ var ( type DiskFile struct { File *os.File fullFilePath string + fileSize int64 + modTime time.Time } func NewDiskFile(f *os.File) *DiskFile { + stat, err := f.Stat() + if err != nil { + glog.Fatalf("stat file %s: %v", f.Name(), err) + } + return &DiskFile{ fullFilePath: f.Name(), File: f, + fileSize: stat.Size(), + modTime: stat.ModTime(), } } @@ -26,11 +36,24 @@ func (df *DiskFile) ReadAt(p []byte, off int64) (n int, err error) { } func (df *DiskFile) WriteAt(p []byte, off int64) (n int, err error) { - return df.File.WriteAt(p, off) + n, err = df.File.WriteAt(p, off) + if err == nil { + waterMark := off + int64(n) + if waterMark > df.fileSize { + df.fileSize = waterMark + df.modTime = time.Now() + } + } + return } func (df *DiskFile) Truncate(off int64) error { - return df.File.Truncate(off) + err := df.File.Truncate(off) + if err == nil { + df.fileSize = off + df.modTime = time.Now() + } + return err } func (df *DiskFile) Close() error { @@ -38,11 +61,7 @@ func (df *DiskFile) Close() error { } func (df *DiskFile) GetStat() (datSize int64, modTime time.Time, err error) { - stat, e := df.File.Stat() - if e == nil { - return stat.Size(), stat.ModTime(), nil - } - return 0, time.Time{}, err + return df.fileSize, df.modTime, nil } func (df *DiskFile) Name() string { From 1ad3200094bc08affd0bb640b24b83f15fedcebd Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 20 Feb 2021 12:39:25 -0800 Subject: [PATCH 2/6] skip seek() when index file writes --- weed/storage/needle/needle_read_write.go | 10 +++++++++- weed/storage/needle_map.go | 9 ++++----- weed/storage/needle_map_leveldb.go | 5 +++++ weed/storage/needle_map_memory.go | 5 +++++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/weed/storage/needle/needle_read_write.go b/weed/storage/needle/needle_read_write.go index e758a6fee..0f72bc0bb 100644 --- a/weed/storage/needle/needle_read_write.go +++ b/weed/storage/needle/needle_read_write.go @@ -161,7 +161,15 @@ func ReadNeedleBlob(r backend.BackendStorageFile, offset int64, size Size, versi dataSize := GetActualSize(size, version) dataSlice = make([]byte, int(dataSize)) - _, err = r.ReadAt(dataSlice, offset) + var n int + n, err = r.ReadAt(dataSlice, offset) + if err != nil && int64(n) == dataSize { + err = nil + } + if err != nil { + fileSize, _, _ := r.GetStat() + println("n",n, "dataSize", dataSize, "offset", offset, "fileSize", fileSize) + } return dataSlice, err } diff --git a/weed/storage/needle_map.go b/weed/storage/needle_map.go index 5b41286ea..d35391f66 100644 --- a/weed/storage/needle_map.go +++ b/weed/storage/needle_map.go @@ -1,7 +1,6 @@ package storage import ( - "fmt" "io" "os" "sync" @@ -41,6 +40,7 @@ type baseNeedleMapper struct { indexFile *os.File indexFileAccessLock sync.Mutex + indexFileOffset int64 } func (nm *baseNeedleMapper) IndexFileSize() uint64 { @@ -56,11 +56,10 @@ func (nm *baseNeedleMapper) appendToIndexFile(key NeedleId, offset Offset, size nm.indexFileAccessLock.Lock() defer nm.indexFileAccessLock.Unlock() - if _, err := nm.indexFile.Seek(0, 2); err != nil { - return fmt.Errorf("cannot seek end of indexfile %s: %v", - nm.indexFile.Name(), err) + written, err := nm.indexFile.WriteAt(bytes, nm.indexFileOffset) + if err == nil { + nm.indexFileOffset += int64(written) } - _, err := nm.indexFile.Write(bytes) return err } diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go index 415cd14dd..9716e9729 100644 --- a/weed/storage/needle_map_leveldb.go +++ b/weed/storage/needle_map_leveldb.go @@ -31,6 +31,11 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Option generateLevelDbFile(dbFileName, indexFile) glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) } + if stat, err := indexFile.Stat(); err != nil { + glog.Fatalf("stat file %s: %v", indexFile.Name(), err) + } else { + m.indexFileOffset = stat.Size() + } glog.V(1).Infof("Opening %s...", dbFileName) if m.db, err = leveldb.OpenFile(dbFileName, opts); err != nil { diff --git a/weed/storage/needle_map_memory.go b/weed/storage/needle_map_memory.go index d0891dc98..1b58708c6 100644 --- a/weed/storage/needle_map_memory.go +++ b/weed/storage/needle_map_memory.go @@ -19,6 +19,11 @@ func NewCompactNeedleMap(file *os.File) *NeedleMap { m: needle_map.NewCompactMap(), } nm.indexFile = file + stat, err := file.Stat() + if err != nil { + glog.Fatalf("stat file %s: %v", file.Name(), err) + } + nm.indexFileOffset = stat.Size() return nm } From 7ef4c24f186f97d9a531849ed626ac1732437b77 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 20 Feb 2021 12:39:33 -0800 Subject: [PATCH 3/6] Revert "avoid file.Stat()" This reverts commit 98c93ca46587795fa03a0c52bea5be31a886be87. --- weed/storage/backend/disk_file.go | 33 +++++++------------------------ 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/weed/storage/backend/disk_file.go b/weed/storage/backend/disk_file.go index 161f4ca83..2b04c8df2 100644 --- a/weed/storage/backend/disk_file.go +++ b/weed/storage/backend/disk_file.go @@ -1,7 +1,6 @@ package backend import ( - "github.com/chrislusf/seaweedfs/weed/glog" "os" "time" ) @@ -13,21 +12,12 @@ var ( type DiskFile struct { File *os.File fullFilePath string - fileSize int64 - modTime time.Time } func NewDiskFile(f *os.File) *DiskFile { - stat, err := f.Stat() - if err != nil { - glog.Fatalf("stat file %s: %v", f.Name(), err) - } - return &DiskFile{ fullFilePath: f.Name(), File: f, - fileSize: stat.Size(), - modTime: stat.ModTime(), } } @@ -36,24 +26,11 @@ func (df *DiskFile) ReadAt(p []byte, off int64) (n int, err error) { } func (df *DiskFile) WriteAt(p []byte, off int64) (n int, err error) { - n, err = df.File.WriteAt(p, off) - if err == nil { - waterMark := off + int64(n) - if waterMark > df.fileSize { - df.fileSize = waterMark - df.modTime = time.Now() - } - } - return + return df.File.WriteAt(p, off) } func (df *DiskFile) Truncate(off int64) error { - err := df.File.Truncate(off) - if err == nil { - df.fileSize = off - df.modTime = time.Now() - } - return err + return df.File.Truncate(off) } func (df *DiskFile) Close() error { @@ -61,7 +38,11 @@ func (df *DiskFile) Close() error { } func (df *DiskFile) GetStat() (datSize int64, modTime time.Time, err error) { - return df.fileSize, df.modTime, nil + stat, e := df.File.Stat() + if e == nil { + return stat.Size(), stat.ModTime(), nil + } + return 0, time.Time{}, err } func (df *DiskFile) Name() string { From a2383b3b12574eb45980b012b0515aab6e324cb8 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 20 Feb 2021 12:42:09 -0800 Subject: [PATCH 4/6] Revert "Revert "avoid file.Stat()"" This reverts commit 7ef4c24f186f97d9a531849ed626ac1732437b77. --- weed/storage/backend/disk_file.go | 33 ++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/weed/storage/backend/disk_file.go b/weed/storage/backend/disk_file.go index 2b04c8df2..161f4ca83 100644 --- a/weed/storage/backend/disk_file.go +++ b/weed/storage/backend/disk_file.go @@ -1,6 +1,7 @@ package backend import ( + "github.com/chrislusf/seaweedfs/weed/glog" "os" "time" ) @@ -12,12 +13,21 @@ var ( type DiskFile struct { File *os.File fullFilePath string + fileSize int64 + modTime time.Time } func NewDiskFile(f *os.File) *DiskFile { + stat, err := f.Stat() + if err != nil { + glog.Fatalf("stat file %s: %v", f.Name(), err) + } + return &DiskFile{ fullFilePath: f.Name(), File: f, + fileSize: stat.Size(), + modTime: stat.ModTime(), } } @@ -26,11 +36,24 @@ func (df *DiskFile) ReadAt(p []byte, off int64) (n int, err error) { } func (df *DiskFile) WriteAt(p []byte, off int64) (n int, err error) { - return df.File.WriteAt(p, off) + n, err = df.File.WriteAt(p, off) + if err == nil { + waterMark := off + int64(n) + if waterMark > df.fileSize { + df.fileSize = waterMark + df.modTime = time.Now() + } + } + return } func (df *DiskFile) Truncate(off int64) error { - return df.File.Truncate(off) + err := df.File.Truncate(off) + if err == nil { + df.fileSize = off + df.modTime = time.Now() + } + return err } func (df *DiskFile) Close() error { @@ -38,11 +61,7 @@ func (df *DiskFile) Close() error { } func (df *DiskFile) GetStat() (datSize int64, modTime time.Time, err error) { - stat, e := df.File.Stat() - if e == nil { - return stat.Size(), stat.ModTime(), nil - } - return 0, time.Time{}, err + return df.fileSize, df.modTime, nil } func (df *DiskFile) Name() string { From 7635f6b9fa5c4d9def4430577247857dfd9ad2f4 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 20 Feb 2021 20:06:06 -0800 Subject: [PATCH 5/6] disk file avoid file.Stat() --- unmaintained/change_superblock/change_superblock.go | 2 +- weed/storage/backend/disk_file.go | 11 ++++++++++- weed/storage/volume_vacuum.go | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/unmaintained/change_superblock/change_superblock.go b/unmaintained/change_superblock/change_superblock.go index afe651c4e..56342a0cb 100644 --- a/unmaintained/change_superblock/change_superblock.go +++ b/unmaintained/change_superblock/change_superblock.go @@ -92,7 +92,7 @@ func main() { header := superBlock.Bytes() - if n, e := datFile.WriteAt(header, 0); n == 0 || e != nil { + if n, e := datBackend.WriteAt(header, 0); n == 0 || e != nil { glog.Fatalf("cannot write super block: %v", e) } diff --git a/weed/storage/backend/disk_file.go b/weed/storage/backend/disk_file.go index 161f4ca83..498963c31 100644 --- a/weed/storage/backend/disk_file.go +++ b/weed/storage/backend/disk_file.go @@ -2,6 +2,7 @@ package backend import ( "github.com/chrislusf/seaweedfs/weed/glog" + . "github.com/chrislusf/seaweedfs/weed/storage/types" "os" "time" ) @@ -22,11 +23,15 @@ func NewDiskFile(f *os.File) *DiskFile { if err != nil { glog.Fatalf("stat file %s: %v", f.Name(), err) } + offset := stat.Size() + if offset%NeedlePaddingSize != 0 { + offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize) + } return &DiskFile{ fullFilePath: f.Name(), File: f, - fileSize: stat.Size(), + fileSize: offset, modTime: stat.ModTime(), } } @@ -47,6 +52,10 @@ func (df *DiskFile) WriteAt(p []byte, off int64) (n int, err error) { return } +func (df *DiskFile) Append(p []byte) (n int, err error) { + return df.WriteAt(p, df.fileSize) +} + func (df *DiskFile) Truncate(off int64) error { err := df.File.Truncate(off) if err == nil { diff --git a/weed/storage/volume_vacuum.go b/weed/storage/volume_vacuum.go index c17c9c937..0ee1e61c6 100644 --- a/weed/storage/volume_vacuum.go +++ b/weed/storage/volume_vacuum.go @@ -286,7 +286,7 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI if err != nil { return fmt.Errorf("ReadNeedleBlob %s key %d offset %d size %d failed: %v", oldDatFile.Name(), key, increIdxEntry.offset.ToActualOffset(), increIdxEntry.size, err) } - dst.Write(needleBytes) + dstDatBackend.Append(needleBytes) util.Uint32toBytes(idxEntryBytes[8:12], uint32(offset/NeedlePaddingSize)) } else { //deleted needle //fakeDelNeedle 's default Data field is nil From 03c643aa6b31fdbcfaf00d69d003f2dc446f2557 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 20 Feb 2021 21:19:21 -0800 Subject: [PATCH 6/6] fix test fileSize need to be divided by 8 --- weed/storage/needle/needle_read_write_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/storage/needle/needle_read_write_test.go b/weed/storage/needle/needle_read_write_test.go index 47582dd26..afcea5a05 100644 --- a/weed/storage/needle/needle_read_write_test.go +++ b/weed/storage/needle/needle_read_write_test.go @@ -48,7 +48,7 @@ func TestAppend(t *testing.T) { int64 : -9223372036854775808 to 9223372036854775807 */ - fileSize := int64(4294967295) + 10000 + fileSize := int64(4294967296) + 10000 tempFile.Truncate(fileSize) defer func() { tempFile.Close()