Files
seaweedFS/weed/mount/weedfs_file_sync.go
Chris Lu d5ee35c8df Fix S3 delete for non-empty directory markers (#8740)
* Fix S3 delete for non-empty directory markers

* Address review feedback on directory marker deletes

* Stabilize FUSE concurrent directory operations
2026-03-23 13:35:16 -07:00

222 lines
6.7 KiB
Go

package mount
import (
"context"
"fmt"
"syscall"
"time"
"github.com/seaweedfs/go-fuse/v2/fuse"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
/**
* Flush method
*
* This is called on each close() of the opened file.
*
* Since file descriptors can be duplicated (dup, dup2, fork), for
* one open call there may be many flush calls.
*
* Filesystems shouldn't assume that flush will always be called
* after some writes, or that if will be called at all.
*
* fi->fh will contain the value set by the open method, or will
* be undefined if the open method didn't set any value.
*
* NOTE: the name of the method is misleading, since (unlike
* fsync) the filesystem is not forced to flush pending writes.
* One reason to flush data is if the filesystem wants to return
* write errors during close. However, such use is non-portable
* because POSIX does not require [close] to wait for delayed I/O to
* complete.
*
* If the filesystem supports file locking operations (setlk,
* getlk) it should remove all locks belonging to 'fi->owner'.
*
* If this request is answered with an error code of ENOSYS,
* this is treated as success and future calls to flush() will
* succeed automatically without being send to the filesystem
* process.
*
* Valid replies:
* fuse_reply_err
*
* @param req request handle
* @param ino the inode number
* @param fi file information
*
* [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html
*/
func (wfs *WFS) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status {
fh := wfs.GetHandle(FileHandleId(in.Fh))
if fh == nil {
// If handle is not found, it might have been already released
// This is not an error condition for FLUSH
return fuse.OK
}
return wfs.doFlush(fh, in.Uid, in.Gid, true)
}
/**
* Synchronize file contents
*
* If the datasync parameter is non-zero, then only the user data
* should be flushed, not the meta data.
*
* If this request is answered with an error code of ENOSYS,
* this is treated as success and future calls to fsync() will
* succeed automatically without being send to the filesystem
* process.
*
* Valid replies:
* fuse_reply_err
*
* @param req request handle
* @param ino the inode number
* @param datasync flag indicating if only data should be flushed
* @param fi file information
*/
func (wfs *WFS) Fsync(cancel <-chan struct{}, in *fuse.FsyncIn) (code fuse.Status) {
fh := wfs.GetHandle(FileHandleId(in.Fh))
if fh == nil {
return fuse.ENOENT
}
// Fsync is an explicit sync request — always flush synchronously
return wfs.doFlush(fh, in.Uid, in.Gid, false)
}
func (wfs *WFS) doFlush(fh *FileHandle, uid, gid uint32, allowAsync bool) fuse.Status {
// flush works at fh level
fileFullPath := fh.FullPath()
fh.RememberPath(fileFullPath)
dir, name := fileFullPath.DirAndName()
// send the data to the OS
glog.V(4).Infof("doFlush %s fh %d", fileFullPath, fh.fh)
// When writebackCache is enabled and this is a close()-triggered Flush (not fsync),
// defer the expensive data upload + metadata flush to a background goroutine.
// This allows the calling process (e.g., rsync) to proceed to the next file immediately.
// POSIX does not require close() to wait for delayed I/O to complete.
if allowAsync && wfs.option.WritebackCache && fh.dirtyMetadata {
if wfs.IsOverQuotaWithUncommitted() {
return fuse.Status(syscall.ENOSPC)
}
fh.asyncFlushPending = true
fh.asyncFlushUid = uid
fh.asyncFlushGid = gid
glog.V(3).Infof("doFlush async deferred %s fh %d", fileFullPath, fh.fh)
return fuse.OK
}
// Synchronous flush path (normal mode, fsync, or no dirty data)
fh.asyncFlushPending = false
// Check quota including uncommitted writes for real-time enforcement
isOverQuota := wfs.IsOverQuotaWithUncommitted()
if !isOverQuota {
if err := fh.dirtyPages.FlushData(); err != nil {
glog.Errorf("%v doFlush: %v", fileFullPath, err)
return fuse.EIO
}
}
if !fh.dirtyMetadata {
return fuse.OK
}
if isOverQuota {
return fuse.Status(syscall.ENOSPC)
}
if err := wfs.flushMetadataToFiler(fh, dir, name, uid, gid); err != nil {
glog.Errorf("%v fh %d flush: %v", fileFullPath, fh.fh, err)
return fuse.EIO
}
if IsDebugFileReadWrite {
fh.mirrorFile.Sync()
}
return fuse.OK
}
// flushMetadataToFiler sends the file's chunk references and attributes to the filer.
// This is shared between the synchronous doFlush path and the async flush completion.
func (wfs *WFS) flushMetadataToFiler(fh *FileHandle, dir, name string, uid, gid uint32) error {
fileFullPath := fh.FullPath()
fhActiveLock := fh.wfs.fhLockTable.AcquireLock("doFlush", fh.fh, util.ExclusiveLock)
defer fh.wfs.fhLockTable.ReleaseLock(fh.fh, fhActiveLock)
err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
entry := fh.GetEntry()
entry.Name = name // this flush may be just after a rename operation
if entry.Attributes != nil {
entry.Attributes.Mime = fh.contentType
if entry.Attributes.Uid == 0 {
entry.Attributes.Uid = uid
}
if entry.Attributes.Gid == 0 {
entry.Attributes.Gid = gid
}
entry.Attributes.Mtime = time.Now().Unix()
}
request := &filer_pb.CreateEntryRequest{
Directory: string(dir),
Entry: entry.GetEntry(),
Signatures: []int32{wfs.signature},
SkipCheckParentDirectory: true,
}
glog.V(4).Infof("%s set chunks: %v", fileFullPath, len(entry.GetChunks()))
manifestChunks, nonManifestChunks := filer.SeparateManifestChunks(entry.GetChunks())
chunks, _ := filer.CompactFileChunks(context.Background(), wfs.LookupFn(), nonManifestChunks)
chunks, manifestErr := filer.MaybeManifestize(wfs.saveDataAsChunk(fileFullPath), chunks)
if manifestErr != nil {
// not good, but should be ok
glog.V(0).Infof("MaybeManifestize: %v", manifestErr)
}
entry.Chunks = append(chunks, manifestChunks...)
wfs.mapPbIdFromLocalToFiler(request.Entry)
defer wfs.mapPbIdFromFilerToLocal(request.Entry)
resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request)
if err != nil {
glog.Errorf("fh flush create %s: %v", fileFullPath, err)
return fmt.Errorf("fh flush create %s: %v", fileFullPath, err)
}
event := resp.GetMetadataEvent()
if event == nil {
event = metadataUpdateEvent(string(dir), request.Entry)
}
if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil {
glog.Warningf("flush %s: best-effort metadata apply failed: %v", fileFullPath, applyErr)
wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(dir))
}
return nil
})
if err == nil {
fh.dirtyMetadata = false
}
return err
}