Fix: prevent panic when swap file creation fails (#7957)

* Fix: prevent panic when swap file creation fails

* weed mount: fix race condition in swap file initialization

Ensure thread-safe access to sf.file and other state in NewSwapFileChunk
and FreeResource by using sf.chunkTrackingLock consistently. Also
set sf.file to nil after closing to prevent reuse.

* weed mount: improve swap directory creation logic

- Check error for os.MkdirAll and log it if it fails.
- Use 0700 permissions for the swap directory for better security.
- Improve error logging context.

* weed mount: add unit tests for swap file creation

Add tests to verify:
- Concurrent initialization of the swap file.
- Correct directory permissions (0700).
- Automatic directory recreation if deleted.

* weed mount: fix thread-safety in swap file unit tests

Use atomic.Uint32 to track failures within goroutines in
TestSwapFile_NewSwapFileChunk_Concurrent to avoid unsafe calls to
t.Errorf from multiple goroutines.

* weed mount: simplify swap file creation logic

Refactor the directory check and retry logic for better readability and
to avoid re-using the main error variable for directory creation errors.
Remove redundant error logging.

* weed mount: improve error checking in swap file tests

Explicitly check if NewSwapFileChunk returns nil to provide more
informative failures.

* weed mount: update DirtyPages interface to return error

Propagate errors from SaveDataAt when swap file creation fails. This
prevents potential panics in the write path.

* weed mount: handle AddPage errors in write paths

Update ChunkedDirtyPages and PageWriter to propagate errors and update
WFS.Write and WFS.CopyFileRange to return fuse.EIO on failure.

* weed mount: update swap directory creation error message

Change "recreate" to "create/recreate" to better reflect that this path
is also taken during the initial creation of the swap directory.

---------

Co-authored-by: lixiang58 <lixiang58@lenovo.com>
Co-authored-by: Chris Lu <chris.lu@gmail.com>
This commit is contained in:
LeeXN
2026-01-04 17:12:37 +08:00
committed by GitHub
parent 0b5a65e00b
commit c909724bf1
9 changed files with 156 additions and 23 deletions

View File

@@ -38,13 +38,13 @@ func newMemoryChunkPages(fh *FileHandle, chunkSize int64) *ChunkedDirtyPages {
return dirtyPages return dirtyPages
} }
func (pages *ChunkedDirtyPages) AddPage(offset int64, data []byte, isSequential bool, tsNs int64) { func (pages *ChunkedDirtyPages) AddPage(offset int64, data []byte, isSequential bool, tsNs int64) error {
pages.hasWrites = true pages.hasWrites = true
glog.V(4).Infof("%v memory AddPage [%d, %d)", pages.fh.fh, offset, offset+int64(len(data))) glog.V(4).Infof("%v memory AddPage [%d, %d)", pages.fh.fh, offset, offset+int64(len(data)))
pages.uploadPipeline.SaveDataAt(data, offset, isSequential, tsNs) _, err := pages.uploadPipeline.SaveDataAt(data, offset, isSequential, tsNs)
return return err
} }
func (pages *ChunkedDirtyPages) FlushData() error { func (pages *ChunkedDirtyPages) FlushData() error {

View File

@@ -29,21 +29,24 @@ func newPageWriter(fh *FileHandle, chunkSize int64) *PageWriter {
return pw return pw
} }
func (pw *PageWriter) AddPage(offset int64, data []byte, isSequential bool, tsNs int64) { func (pw *PageWriter) AddPage(offset int64, data []byte, isSequential bool, tsNs int64) error {
glog.V(4).Infof("%v AddPage [%d, %d)", pw.fh.fh, offset, offset+int64(len(data))) glog.V(4).Infof("%v AddPage [%d, %d)", pw.fh.fh, offset, offset+int64(len(data)))
chunkIndex := offset / pw.chunkSize chunkIndex := offset / pw.chunkSize
for i := chunkIndex; len(data) > 0; i++ { for i := chunkIndex; len(data) > 0; i++ {
writeSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset) writeSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)
pw.addToOneChunk(i, offset, data[:writeSize], isSequential, tsNs) if err := pw.addToOneChunk(i, offset, data[:writeSize], isSequential, tsNs); err != nil {
return err
}
offset += writeSize offset += writeSize
data = data[writeSize:] data = data[writeSize:]
} }
return nil
} }
func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte, isSequential bool, tsNs int64) { func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte, isSequential bool, tsNs int64) error {
pw.randomWriter.AddPage(offset, data, isSequential, tsNs) return pw.randomWriter.AddPage(offset, data, isSequential, tsNs)
} }
func (pw *PageWriter) FlushData() error { func (pw *PageWriter) FlushData() error {

View File

@@ -1,7 +1,7 @@
package page_writer package page_writer
type DirtyPages interface { type DirtyPages interface {
AddPage(offset int64, data []byte, isSequential bool, tsNs int64) AddPage(offset int64, data []byte, isSequential bool, tsNs int64) error
FlushData() error FlushData() error
ReadDirtyDataAt(data []byte, startOffset int64, tsNs int64) (maxStop int64) ReadDirtyDataAt(data []byte, startOffset int64, tsNs int64) (maxStop int64)
Destroy() Destroy()

View File

@@ -1,12 +1,13 @@
package page_writer package page_writer
import ( import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/mem"
"io" "io"
"os" "os"
"sync" "sync"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/mem"
) )
var ( var (
@@ -42,23 +43,34 @@ func NewSwapFile(dir string, chunkSize int64) *SwapFile {
} }
} }
func (sf *SwapFile) FreeResource() { func (sf *SwapFile) FreeResource() {
sf.chunkTrackingLock.Lock()
defer sf.chunkTrackingLock.Unlock()
if sf.file != nil { if sf.file != nil {
sf.file.Close() sf.file.Close()
os.Remove(sf.file.Name()) os.Remove(sf.file.Name())
sf.file = nil
} }
} }
func (sf *SwapFile) NewSwapFileChunk(logicChunkIndex LogicChunkIndex) (tc *SwapFileChunk) { func (sf *SwapFile) NewSwapFileChunk(logicChunkIndex LogicChunkIndex) (tc *SwapFileChunk) {
sf.chunkTrackingLock.Lock()
defer sf.chunkTrackingLock.Unlock()
if sf.file == nil { if sf.file == nil {
var err error var err error
sf.file, err = os.CreateTemp(sf.dir, "") sf.file, err = os.CreateTemp(sf.dir, "")
if os.IsNotExist(err) {
if mkdirErr := os.MkdirAll(sf.dir, 0700); mkdirErr != nil {
glog.Errorf("create/recreate swap directory %s: %v", sf.dir, mkdirErr)
return nil
}
sf.file, err = os.CreateTemp(sf.dir, "")
}
if err != nil { if err != nil {
glog.Errorf("create swap file: %v", err) glog.Errorf("create swap file in %s: %v", sf.dir, err)
return nil return nil
} }
} }
sf.chunkTrackingLock.Lock()
defer sf.chunkTrackingLock.Unlock()
sf.activeChunkCount++ sf.activeChunkCount++

View File

@@ -0,0 +1,105 @@
package page_writer
import (
"os"
"path/filepath"
"sync"
"sync/atomic"
"testing"
)
func TestSwapFile_NewSwapFileChunk_Concurrent(t *testing.T) {
tempDir, err := os.MkdirTemp("", "seaweedfs_swap_test")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer os.RemoveAll(tempDir)
sf := NewSwapFile(filepath.Join(tempDir, "swap"), 1024)
defer sf.FreeResource()
var wg sync.WaitGroup
var failures atomic.Uint32
numConcurrent := 10
for i := 0; i < numConcurrent; i++ {
wg.Add(1)
go func(idx int) {
defer wg.Done()
chunk := sf.NewSwapFileChunk(LogicChunkIndex(idx))
if chunk == nil {
failures.Add(1)
}
}(i)
}
wg.Wait()
if failures.Load() > 0 {
t.Errorf("failed to create %d chunks", failures.Load())
}
if sf.file == nil {
t.Error("sf.file should not be nil")
}
}
func TestSwapFile_MkdirAll_Permissions(t *testing.T) {
tempDir, err := os.MkdirTemp("", "seaweedfs_swap_perm_test")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer os.RemoveAll(tempDir)
swapDir := filepath.Join(tempDir, "swap_subdir")
sf := NewSwapFile(swapDir, 1024)
defer sf.FreeResource()
// This should trigger os.MkdirAll
if sf.NewSwapFileChunk(0) == nil {
t.Fatal("NewSwapFileChunk failed to create a chunk")
}
info, err := os.Stat(swapDir)
if err != nil {
t.Fatalf("failed to stat swap dir: %v", err)
}
if !info.IsDir() {
t.Errorf("expected %s to be a directory", swapDir)
}
// Check permissions - should be 0700
if info.Mode().Perm() != 0700 {
t.Errorf("expected permissions 0700, got %o", info.Mode().Perm())
}
}
func TestSwapFile_RecreateDir(t *testing.T) {
tempDir, err := os.MkdirTemp("", "seaweedfs_swap_recreate_test")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer os.RemoveAll(tempDir)
swapDir := filepath.Join(tempDir, "swap_recreate")
sf := NewSwapFile(swapDir, 1024)
// Create first chunk
if sf.NewSwapFileChunk(0) == nil {
t.Fatal("NewSwapFileChunk failed to create the first chunk")
}
sf.FreeResource()
// Delete the directory
os.RemoveAll(swapDir)
// Create second chunk - should recreate directory
chunk := sf.NewSwapFileChunk(1)
if chunk == nil {
t.Error("failed to create chunk after directory deletion")
}
if _, err := os.Stat(swapDir); os.IsNotExist(err) {
t.Error("swap directory was not recreated")
}
sf.FreeResource()
}

View File

@@ -2,10 +2,11 @@ package page_writer
import ( import (
"fmt" "fmt"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
"sync" "sync"
"sync/atomic" "sync/atomic"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
) )
type LogicChunkIndex int type LogicChunkIndex int
@@ -55,7 +56,7 @@ func NewUploadPipeline(writers *util.LimitedConcurrentExecutor, chunkSize int64,
return t return t
} }
func (up *UploadPipeline) SaveDataAt(p []byte, off int64, isSequential bool, tsNs int64) (n int) { func (up *UploadPipeline) SaveDataAt(p []byte, off int64, isSequential bool, tsNs int64) (n int, err error) {
up.chunksLock.Lock() up.chunksLock.Lock()
defer up.chunksLock.Unlock() defer up.chunksLock.Unlock()
@@ -103,6 +104,9 @@ func (up *UploadPipeline) SaveDataAt(p []byte, off int64, isSequential bool, tsN
} else { } else {
pageChunk = up.swapFile.NewSwapFileChunk(logicChunkIndex) pageChunk = up.swapFile.NewSwapFileChunk(logicChunkIndex)
// fmt.Printf(" create file chunk %d\n", logicChunkIndex) // fmt.Printf(" create file chunk %d\n", logicChunkIndex)
if pageChunk == nil {
return 0, fmt.Errorf("failed to create swap file chunk")
}
} }
up.writableChunks[logicChunkIndex] = pageChunk up.writableChunks[logicChunkIndex] = pageChunk
} }

View File

@@ -1,8 +1,9 @@
package page_writer package page_writer
import ( import (
"github.com/seaweedfs/seaweedfs/weed/util"
"testing" "testing"
"github.com/seaweedfs/seaweedfs/weed/util"
) )
func TestUploadPipeline(t *testing.T) { func TestUploadPipeline(t *testing.T) {

View File

@@ -128,11 +128,14 @@ func (wfs *WFS) CopyFileRange(cancel <-chan struct{}, in *fuse.CopyFileRangeIn)
// Perform the write // Perform the write
fhOut.dirtyPages.writerPattern.MonitorWriteAt(offsetOut, int(numBytesRead)) fhOut.dirtyPages.writerPattern.MonitorWriteAt(offsetOut, int(numBytesRead))
fhOut.dirtyPages.AddPage( if err := fhOut.dirtyPages.AddPage(
offsetOut, offsetOut,
buff[:numBytesRead], buff[:numBytesRead],
fhOut.dirtyPages.writerPattern.IsSequentialMode(), fhOut.dirtyPages.writerPattern.IsSequentialMode(),
nowUnixNano) nowUnixNano); err != nil {
glog.Errorf("AddPage error: %v", err)
return 0, fuse.EIO
}
// Accumulate for the next loop iteration // Accumulate for the next loop iteration
totalCopied += numBytesRead totalCopied += numBytesRead

View File

@@ -1,11 +1,13 @@
package mount package mount
import ( import (
"github.com/hanwen/go-fuse/v2/fuse"
"github.com/seaweedfs/seaweedfs/weed/util"
"net/http" "net/http"
"syscall" "syscall"
"time" "time"
"github.com/hanwen/go-fuse/v2/fuse"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
) )
/** /**
@@ -72,7 +74,10 @@ func (wfs *WFS) Write(cancel <-chan struct{}, in *fuse.WriteIn, data []byte) (wr
// glog.V(4).Infof("%v write [%d,%d) %d", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)), len(req.Data)) // glog.V(4).Infof("%v write [%d,%d) %d", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)), len(req.Data))
fh.dirtyPages.AddPage(offset, data, fh.dirtyPages.writerPattern.IsSequentialMode(), tsNs) if err := fh.dirtyPages.AddPage(offset, data, fh.dirtyPages.writerPattern.IsSequentialMode(), tsNs); err != nil {
glog.Errorf("AddPage error: %v", err)
return 0, fuse.EIO
}
written = uint32(len(data)) written = uint32(len(data))