Files
seaweedFS/weed/command/mount.go
Chris Lu 4c36cd04d6 mount: add periodic metadata sync to protect chunks from orphan cleanup (#7700)
mount: add periodic metadata flush to protect chunks from orphan cleanup

When a file is opened via FUSE mount and written for a long time without
being closed, chunks are uploaded to volume servers but the file metadata
(containing chunk references) is only saved to the filer on file close.

If volume.fsck runs during this window, it may identify these chunks as
orphans (not referenced in filer metadata) and purge them, causing data loss.

This commit adds a background task that periodically flushes file metadata
for open files to the filer, ensuring chunk references are visible to
volume.fsck even before files are closed.

New option:
  -metadataFlushSeconds (default: 120)
    Interval in seconds for flushing dirty file metadata to filer.
    Set to 0 to disable.

Fixes: https://github.com/seaweedfs/seaweedfs/issues/7649
2025-12-10 12:45:04 -08:00

137 lines
6.9 KiB
Go

package command
import (
"os"
"time"
)
type MountOptions struct {
filer *string
filerMountRootPath *string
dir *string
dirAutoCreate *bool
collection *string
collectionQuota *int
replication *string
diskType *string
ttlSec *int
chunkSizeLimitMB *int
concurrentWriters *int
concurrentReaders *int
cacheMetaTtlSec *int
cacheDirForRead *string
cacheDirForWrite *string
cacheSizeMBForRead *int64
dataCenter *string
allowOthers *bool
umaskString *string
nonempty *bool
volumeServerAccess *string
uidMap *string
gidMap *string
readOnly *bool
debug *bool
debugPort *int
localSocket *string
disableXAttr *bool
extraOptions []string
fuseCommandPid int
// Periodic metadata flush to protect against orphan chunk cleanup
metadataFlushSeconds *int
// RDMA acceleration options
rdmaEnabled *bool
rdmaSidecarAddr *string
rdmaFallback *bool
rdmaReadOnly *bool
rdmaMaxConcurrent *int
rdmaTimeoutMs *int
}
var (
mountOptions MountOptions
mountCpuProfile *string
mountMemProfile *string
mountReadRetryTime *time.Duration
)
func init() {
cmdMount.Run = runMount // break init cycle
mountOptions.filer = cmdMount.Flag.String("filer", "localhost:8888", "comma-separated weed filer location")
mountOptions.filerMountRootPath = cmdMount.Flag.String("filer.path", "/", "mount this remote path from filer server")
mountOptions.dir = cmdMount.Flag.String("dir", ".", "mount weed filer to this directory")
mountOptions.dirAutoCreate = cmdMount.Flag.Bool("dirAutoCreate", false, "auto create the directory to mount to")
mountOptions.collection = cmdMount.Flag.String("collection", "", "collection to create the files")
mountOptions.collectionQuota = cmdMount.Flag.Int("collectionQuotaMB", 0, "quota for the collection")
mountOptions.replication = cmdMount.Flag.String("replication", "", "replication(e.g. 000, 001) to create to files. If empty, let filer decide.")
mountOptions.diskType = cmdMount.Flag.String("disk", "", "[hdd|ssd|<tag>] hard drive or solid state drive or any tag")
mountOptions.ttlSec = cmdMount.Flag.Int("ttl", 0, "file ttl in seconds")
mountOptions.chunkSizeLimitMB = cmdMount.Flag.Int("chunkSizeLimitMB", 2, "local write buffer size, also chunk large files")
mountOptions.concurrentWriters = cmdMount.Flag.Int("concurrentWriters", 32, "limit concurrent goroutine writers")
mountOptions.concurrentReaders = cmdMount.Flag.Int("concurrentReaders", 16, "limit concurrent chunk fetches for read operations")
mountOptions.cacheDirForRead = cmdMount.Flag.String("cacheDir", os.TempDir(), "local cache directory for file chunks and meta data")
mountOptions.cacheSizeMBForRead = cmdMount.Flag.Int64("cacheCapacityMB", 128, "file chunk read cache capacity in MB")
mountOptions.cacheDirForWrite = cmdMount.Flag.String("cacheDirWrite", "", "buffer writes mostly for large files")
mountOptions.cacheMetaTtlSec = cmdMount.Flag.Int("cacheMetaTtlSec", 60, "metadata cache validity seconds")
mountOptions.dataCenter = cmdMount.Flag.String("dataCenter", "", "prefer to write to the data center")
mountOptions.allowOthers = cmdMount.Flag.Bool("allowOthers", true, "allows other users to access the file system")
mountOptions.umaskString = cmdMount.Flag.String("umask", "022", "octal umask, e.g., 022, 0111")
mountOptions.nonempty = cmdMount.Flag.Bool("nonempty", false, "allows the mounting over a non-empty directory")
mountOptions.volumeServerAccess = cmdMount.Flag.String("volumeServerAccess", "direct", "access volume servers by [direct|publicUrl|filerProxy]")
mountOptions.uidMap = cmdMount.Flag.String("map.uid", "", "map local uid to uid on filer, comma-separated <local_uid>:<filer_uid>")
mountOptions.gidMap = cmdMount.Flag.String("map.gid", "", "map local gid to gid on filer, comma-separated <local_gid>:<filer_gid>")
mountOptions.readOnly = cmdMount.Flag.Bool("readOnly", false, "read only")
mountOptions.debug = cmdMount.Flag.Bool("debug", false, "serves runtime profiling data, e.g., http://localhost:<debug.port>/debug/pprof/goroutine?debug=2")
mountOptions.debugPort = cmdMount.Flag.Int("debug.port", 6061, "http port for debugging")
mountOptions.localSocket = cmdMount.Flag.String("localSocket", "", "default to /tmp/seaweedfs-mount-<mount_dir_hash>.sock")
mountOptions.disableXAttr = cmdMount.Flag.Bool("disableXAttr", false, "disable xattr")
mountOptions.fuseCommandPid = 0
// Periodic metadata flush to protect against orphan chunk cleanup
mountOptions.metadataFlushSeconds = cmdMount.Flag.Int("metadataFlushSeconds", 120, "periodically flush file metadata to filer in seconds (0 to disable). This protects chunks from being purged by volume.fsck for long-running writes")
// RDMA acceleration flags
mountOptions.rdmaEnabled = cmdMount.Flag.Bool("rdma.enabled", false, "enable RDMA acceleration for reads")
mountOptions.rdmaSidecarAddr = cmdMount.Flag.String("rdma.sidecar", "", "RDMA sidecar address (e.g., localhost:8081)")
mountOptions.rdmaFallback = cmdMount.Flag.Bool("rdma.fallback", true, "fallback to HTTP when RDMA fails")
mountOptions.rdmaReadOnly = cmdMount.Flag.Bool("rdma.readOnly", false, "use RDMA for reads only (writes use HTTP)")
mountOptions.rdmaMaxConcurrent = cmdMount.Flag.Int("rdma.maxConcurrent", 64, "max concurrent RDMA operations")
mountOptions.rdmaTimeoutMs = cmdMount.Flag.Int("rdma.timeoutMs", 5000, "RDMA operation timeout in milliseconds")
mountCpuProfile = cmdMount.Flag.String("cpuprofile", "", "cpu profile output file")
mountMemProfile = cmdMount.Flag.String("memprofile", "", "memory profile output file")
mountReadRetryTime = cmdMount.Flag.Duration("readRetryTime", 6*time.Second, "maximum read retry wait time")
}
var cmdMount = &Command{
UsageLine: "mount -filer=localhost:8888 -dir=/some/dir",
Short: "mount weed filer to a directory as file system in userspace(FUSE)",
Long: `mount weed filer to userspace.
Pre-requisites:
1) have SeaweedFS master and volume servers running
2) have a "weed filer" running
These 2 requirements can be achieved with one command "weed server -filer=true"
This uses github.com/seaweedfs/fuse, which enables writing FUSE file systems on
Linux, and OS X.
On OS X, it requires OSXFUSE (https://osxfuse.github.io/).
RDMA Acceleration:
For ultra-fast reads, enable RDMA acceleration with an RDMA sidecar:
weed mount -filer=localhost:8888 -dir=/mnt/seaweedfs \
-rdma.enabled=true -rdma.sidecar=localhost:8081
RDMA Options:
-rdma.enabled=false Enable RDMA acceleration for reads
-rdma.sidecar="" RDMA sidecar address (required if enabled)
-rdma.fallback=true Fallback to HTTP when RDMA fails
-rdma.readOnly=false Use RDMA for reads only (writes use HTTP)
-rdma.maxConcurrent=64 Max concurrent RDMA operations
-rdma.timeoutMs=5000 RDMA operation timeout in milliseconds
`,
}