Files
seaweedFS/weed/mount/posix_file_lock.go
Chris Lu 805625d06e Add FUSE integration tests for POSIX file locking (#8752)
* Add FUSE integration tests for POSIX file locking

Test flock() and fcntl() advisory locks over the FUSE mount:
- Exclusive and shared flock with conflict detection
- flock upgrade (shared to exclusive) and release on close
- fcntl F_SETLK write lock conflicts and shared read locks
- fcntl F_GETLK conflict reporting on overlapping byte ranges
- Non-overlapping byte-range locks held independently
- F_SETLKW blocking until conflicting lock is released
- Lock release on file descriptor close
- Concurrent lock contention with multiple workers

* Fix review feedback in POSIX lock integration tests

- Assert specific EAGAIN error on fcntl lock conflicts instead of generic Error
- Use O_APPEND in concurrent contention test so workers append rather than overwrite
- Verify exact line count (numWorkers * writesPerWorker) after concurrent test
- Check unlock error in F_SETLKW blocking test goroutine

* Refactor fcntl tests to use subprocesses for inter-process semantics

POSIX fcntl locks use the process's files_struct as lock owner, so all
fds in the same process share the same owner and never conflict. This
caused the fcntl tests to silently pass without exercising lock conflicts.

Changes:
- Add TestFcntlLockHelper subprocess entry point with hold/try/getlk actions
- Add lockHolder with channel-based coordination (no scanner race)
- Rewrite all fcntl tests to run contenders in separate subprocesses
- Fix F_UNLCK int16 cast in GetLk assertion for type-safe comparison
- Fix concurrent test: use non-blocking flock with retry to avoid
  exhausting go-fuse server reader goroutines (blocking FUSE SETLKW
  can starve unlock request processing, causing deadlock)

flock tests remain same-process since flock uses per-struct-file owners.

* Fix misleading comment and error handling in lock test subprocess

- Fix comment: tryLockInSubprocess tests a subprocess, not the test process
- Distinguish EAGAIN/EACCES from unexpected errors in subprocess try mode
  so real failures aren't silently masked as lock conflicts

* Fix CI race in FcntlReleaseOnClose and increase flock retry budget

- FcntlReleaseOnClose: retry lock acquisition after subprocess exits
  since the FUSE server may not process Release immediately
- ConcurrentLockContention: increase retry limit from 500 to 3000
  (5s → 30s budget) to handle CI load

* separating flock and fcntl in the in-memory lock table and cleaning them up through the right release path: PID for POSIX locks, lock owner for flock

* ReleasePosixOwner

* weed/mount: flush before releasing posix close owner

* weed/mount: keep woken lock waiters from losing inode state

* test/fuse: make blocking fcntl helper state explicit

* test/fuse: assert flock contention never overlaps

* test/fuse: stabilize concurrent lock contention check

* test/fuse: make concurrent contention writes deterministic

* weed/mount: retry synchronous metadata flushes
2026-03-24 11:43:25 -07:00

362 lines
10 KiB
Go

package mount
import (
"math"
"sort"
"sync"
"syscall"
"github.com/seaweedfs/go-fuse/v2/fuse"
)
// lockRange represents a single held POSIX byte-range lock.
type lockRange struct {
Start uint64 // inclusive byte offset
End uint64 // inclusive; math.MaxUint64 means "to EOF"
Typ uint32 // syscall.F_RDLCK or syscall.F_WRLCK
Owner uint64 // FUSE lock owner (from LkIn.Owner)
Pid uint32 // PID of lock holder (for GetLk reporting)
// flock and fcntl locks have different ownership and close semantics.
// Keep them in separate namespaces inside the table.
IsFlock bool
}
// inodeLocks holds all locks for one inode plus a waiter queue for SetLkw.
type inodeLocks struct {
mu sync.Mutex
locks []lockRange // currently held locks, sorted by Start
waiters []*lockWaiter // blocked SetLkw callers
wakeRefs int // woken waiters still retrying on this inodeLocks
}
// lockWaiter represents a blocked SetLkw caller.
type lockWaiter struct {
requested lockRange // the lock this waiter is trying to acquire
ch chan struct{} // closed when the waiter should re-check
wakeRefHeld bool
}
// PosixLockTable is the per-mount POSIX lock manager.
type PosixLockTable struct {
mu sync.Mutex
inodes map[uint64]*inodeLocks
}
func NewPosixLockTable() *PosixLockTable {
return &PosixLockTable{
inodes: make(map[uint64]*inodeLocks),
}
}
// getOrCreateInodeLocks returns the lock state for an inode, creating it if needed.
func (plt *PosixLockTable) getOrCreateInodeLocks(inode uint64) *inodeLocks {
plt.mu.Lock()
defer plt.mu.Unlock()
il, ok := plt.inodes[inode]
if !ok {
il = &inodeLocks{}
plt.inodes[inode] = il
}
return il
}
// getInodeLocks returns the lock state for an inode, or nil if none exists.
func (plt *PosixLockTable) getInodeLocks(inode uint64) *inodeLocks {
plt.mu.Lock()
defer plt.mu.Unlock()
return plt.inodes[inode]
}
// maybeCleanupInode removes the inodeLocks entry if it has no locks, no waiters,
// and no woken waiters still retrying against this inodeLocks.
func (plt *PosixLockTable) maybeCleanupInode(inode uint64, il *inodeLocks) {
// Caller must NOT hold il.mu. We acquire both locks in the correct order.
plt.mu.Lock()
defer plt.mu.Unlock()
il.mu.Lock()
defer il.mu.Unlock()
if len(il.locks) == 0 && len(il.waiters) == 0 && il.wakeRefs == 0 {
delete(plt.inodes, inode)
}
}
// rangesOverlap returns true if two inclusive ranges overlap.
func rangesOverlap(aStart, aEnd, bStart, bEnd uint64) bool {
return aStart <= bEnd && bStart <= aEnd
}
// findConflict returns the first lock that conflicts with the proposed lock.
// A conflict exists when ranges overlap, at least one is a write lock, and the owners differ.
func findConflict(locks []lockRange, proposed lockRange) (lockRange, bool) {
for _, h := range locks {
if h.IsFlock != proposed.IsFlock {
continue
}
if h.Owner == proposed.Owner {
continue
}
if !rangesOverlap(h.Start, h.End, proposed.Start, proposed.End) {
continue
}
if h.Typ == syscall.F_RDLCK && proposed.Typ == syscall.F_RDLCK {
continue
}
return h, true
}
return lockRange{}, false
}
// insertAndCoalesce inserts a lock for the given owner, replacing/splitting any
// existing same-owner locks that overlap. Adjacent same-type locks are merged.
// Caller must hold il.mu.
func insertAndCoalesce(il *inodeLocks, lk lockRange) {
owner := lk.Owner
var kept []lockRange
for _, h := range il.locks {
if h.Owner != owner || h.IsFlock != lk.IsFlock {
kept = append(kept, h)
continue
}
if !rangesOverlap(h.Start, h.End, lk.Start, lk.End) {
// Check for adjacency with same type for merging.
if h.Typ == lk.Typ && ((h.End < ^uint64(0) && h.End+1 == lk.Start) || (lk.End < ^uint64(0) && lk.End+1 == h.Start)) {
// Merge adjacent same-type lock into lk.
if h.Start < lk.Start {
lk.Start = h.Start
}
if h.End > lk.End {
lk.End = h.End
}
continue
}
kept = append(kept, h)
continue
}
// Overlapping same-owner lock.
if h.Typ == lk.Typ {
// Same type: absorb into lk (expand range).
if h.Start < lk.Start {
lk.Start = h.Start
}
if h.End > lk.End {
lk.End = h.End
}
continue
}
// Different type: truncate or split the existing lock.
if h.Start < lk.Start {
// Left portion survives.
left := h
left.End = lk.Start - 1
kept = append(kept, left)
}
if h.End > lk.End {
// Right portion survives.
right := h
right.Start = lk.End + 1
kept = append(kept, right)
}
}
kept = append(kept, lk)
sort.Slice(kept, func(i, j int) bool {
return kept[i].Start < kept[j].Start
})
il.locks = kept
}
// removeLocks removes or splits matching locks in the given range.
// Caller must hold il.mu.
func removeLocks(il *inodeLocks, matches func(lockRange) bool, start, end uint64) {
var kept []lockRange
for _, h := range il.locks {
if !matches(h) || !rangesOverlap(h.Start, h.End, start, end) {
kept = append(kept, h)
continue
}
// h overlaps the unlock range.
if h.Start < start {
// Left portion survives.
left := h
left.End = start - 1
kept = append(kept, left)
}
if h.End > end {
// Right portion survives.
right := h
right.Start = end + 1
kept = append(kept, right)
}
// If fully contained, it's simply dropped.
}
il.locks = kept
}
func (plt *PosixLockTable) releaseMatching(inode uint64, matches func(lockRange) bool) {
il := plt.getInodeLocks(inode)
if il == nil {
return
}
il.mu.Lock()
removeLocks(il, matches, 0, math.MaxUint64)
wakeEligibleWaiters(il)
il.mu.Unlock()
plt.maybeCleanupInode(inode, il)
}
// releaseWakeRef drops the temporary reference that keeps inodeLocks live while
// a woken waiter retries its SetLkw acquisition.
func releaseWakeRef(il *inodeLocks, waiter *lockWaiter) {
if waiter == nil || !waiter.wakeRefHeld {
return
}
waiter.wakeRefHeld = false
il.wakeRefs--
}
// wakeEligibleWaiters selectively wakes blocked SetLkw callers that can now
// succeed given the current lock state. Waiters whose requests still conflict
// with held locks remain in the queue, avoiding a thundering herd.
// Caller must hold il.mu.
func wakeEligibleWaiters(il *inodeLocks) {
remaining := il.waiters[:0]
for _, w := range il.waiters {
if _, conflicted := findConflict(il.locks, w.requested); !conflicted {
w.wakeRefHeld = true
il.wakeRefs++
close(w.ch)
} else {
remaining = append(remaining, w)
}
}
il.waiters = remaining
}
// removeWaiter removes a specific waiter from the list.
// Caller must hold il.mu.
func removeWaiter(il *inodeLocks, w *lockWaiter) {
for i, existing := range il.waiters {
if existing == w {
il.waiters = append(il.waiters[:i], il.waiters[i+1:]...)
return
}
}
}
// GetLk checks for a conflicting lock. If found, it populates out with the
// conflict details. If no conflict, out.Typ is set to F_UNLCK.
func (plt *PosixLockTable) GetLk(inode uint64, proposed lockRange, out *fuse.LkOut) {
il := plt.getInodeLocks(inode)
if il == nil {
out.Lk.Typ = syscall.F_UNLCK
return
}
il.mu.Lock()
conflict, found := findConflict(il.locks, proposed)
il.mu.Unlock()
if found {
out.Lk.Start = conflict.Start
out.Lk.End = conflict.End
out.Lk.Typ = conflict.Typ
out.Lk.Pid = conflict.Pid
} else {
out.Lk.Typ = syscall.F_UNLCK
}
}
// SetLk attempts a non-blocking lock or unlock.
// For unlock (F_UNLCK): removes locks in the given range for the owner.
// For lock: returns fuse.EAGAIN if a conflict exists, fuse.OK on success.
func (plt *PosixLockTable) SetLk(inode uint64, lk lockRange) fuse.Status {
if lk.Typ == syscall.F_UNLCK {
il := plt.getInodeLocks(inode)
if il == nil {
return fuse.OK
}
il.mu.Lock()
removeLocks(il, func(existing lockRange) bool {
return existing.Owner == lk.Owner && existing.IsFlock == lk.IsFlock
}, lk.Start, lk.End)
wakeEligibleWaiters(il)
il.mu.Unlock()
plt.maybeCleanupInode(inode, il)
return fuse.OK
}
il := plt.getOrCreateInodeLocks(inode)
il.mu.Lock()
if _, found := findConflict(il.locks, lk); found {
il.mu.Unlock()
return fuse.EAGAIN
}
insertAndCoalesce(il, lk)
il.mu.Unlock()
return fuse.OK
}
// SetLkw attempts a blocking lock. It waits until the lock can be acquired
// or the cancel channel is closed.
func (plt *PosixLockTable) SetLkw(inode uint64, lk lockRange, cancel <-chan struct{}) fuse.Status {
if lk.Typ == syscall.F_UNLCK {
return plt.SetLk(inode, lk)
}
il := plt.getOrCreateInodeLocks(inode)
var waiter *lockWaiter
for {
il.mu.Lock()
releaseWakeRef(il, waiter)
if _, found := findConflict(il.locks, lk); !found {
insertAndCoalesce(il, lk)
il.mu.Unlock()
return fuse.OK
}
// Register waiter with the requested lock details for selective waking.
waiter = &lockWaiter{requested: lk, ch: make(chan struct{})}
il.waiters = append(il.waiters, waiter)
il.mu.Unlock()
// Block until woken or cancelled.
select {
case <-waiter.ch:
// Woken — retry.
continue
case <-cancel:
// Request cancelled.
il.mu.Lock()
releaseWakeRef(il, waiter)
removeWaiter(il, waiter)
il.mu.Unlock()
plt.maybeCleanupInode(inode, il)
return fuse.EINTR
}
}
}
// ReleaseOwner removes all locks held by the given owner on the given inode.
// Used for same-owner cleanup in tests and lock-table operations.
func (plt *PosixLockTable) ReleaseOwner(inode uint64, owner uint64) {
plt.releaseMatching(inode, func(lk lockRange) bool {
return lk.Owner == owner
})
}
// ReleaseFlockOwner removes flock locks for a released file description.
// FUSE only provides LockOwner on RELEASE when FUSE_RELEASE_FLOCK_UNLOCK is set.
func (plt *PosixLockTable) ReleaseFlockOwner(inode uint64, owner uint64) {
plt.releaseMatching(inode, func(lk lockRange) bool {
return lk.IsFlock && lk.Owner == owner
})
}
// ReleasePosixOwner removes POSIX fcntl locks for a closing lock owner.
// FUSE passes the closing fi->owner on FLUSH, which is the correct close-time
// identity for POSIX byte-range lock cleanup.
func (plt *PosixLockTable) ReleasePosixOwner(inode uint64, owner uint64) {
plt.releaseMatching(inode, func(lk lockRange) bool {
return !lk.IsFlock && lk.Owner == owner
})
}