Files
seaweedFS/weed/command/mount.go
Chris Lu d48e1e1659 mount: improve read throughput with parallel chunk fetching (#7569)
* mount: improve read throughput with parallel chunk fetching

This addresses issue #7504 where a single weed mount FUSE instance
does not fully utilize node network bandwidth when reading large files.

Changes:
- Add -concurrentReaders mount option (default: 16) to control the
  maximum number of parallel chunk fetches during read operations
- Implement parallel section reading in ChunkGroup.ReadDataAt() using
  errgroup for better throughput when reading across multiple sections
- Enhance ReaderCache with MaybeCacheMany() to prefetch multiple chunks
  ahead in parallel during sequential reads (now prefetches 4 chunks)
- Increase ReaderCache limit dynamically based on concurrentReaders
  to support higher read parallelism

The bottleneck was that chunks were being read sequentially even when
they reside on different volume servers. By introducing parallel chunk
fetching, a single mount instance can now better saturate available
network bandwidth.

Fixes: #7504

* fmt

* Address review comments: make prefetch configurable, improve error handling

Changes:
1. Add DefaultPrefetchCount constant (4) to reader_at.go
2. Add GetPrefetchCount() method to ChunkGroup that derives prefetch count
   from concurrentReaders (1/4 ratio, min 1, max 8)
3. Pass prefetch count through NewChunkReaderAtFromClient
4. Fix error handling in readDataAtParallel to prioritize errgroup error
5. Update all callers to use DefaultPrefetchCount constant

For mount operations, prefetch scales with -concurrentReaders:
- concurrentReaders=16 (default) -> prefetch=4
- concurrentReaders=32 -> prefetch=8 (capped)
- concurrentReaders=4 -> prefetch=1

For non-mount paths (WebDAV, query engine, MQ), uses DefaultPrefetchCount.

* fmt

* Refactor: use variadic parameter instead of new function name

Use NewChunkGroup with optional concurrentReaders parameter instead of
creating a separate NewChunkGroupWithConcurrency function.

This maintains backward compatibility - existing callers without the
parameter get the default of 16 concurrent readers.

* Use explicit concurrentReaders parameter instead of variadic

* Refactor: use MaybeCache with count parameter instead of new MaybeCacheMany function

* Address nitpick review comments

- Add upper bound (128) on concurrentReaders to prevent excessive goroutine fan-out
- Cap readerCacheLimit at 256 accordingly
- Fix SetChunks: use Lock() instead of RLock() since we are writing to group.sections
2025-11-29 10:06:11 -08:00

131 lines
6.5 KiB
Go

package command
import (
"os"
"time"
)
type MountOptions struct {
filer *string
filerMountRootPath *string
dir *string
dirAutoCreate *bool
collection *string
collectionQuota *int
replication *string
diskType *string
ttlSec *int
chunkSizeLimitMB *int
concurrentWriters *int
concurrentReaders *int
cacheMetaTtlSec *int
cacheDirForRead *string
cacheDirForWrite *string
cacheSizeMBForRead *int64
dataCenter *string
allowOthers *bool
umaskString *string
nonempty *bool
volumeServerAccess *string
uidMap *string
gidMap *string
readOnly *bool
debug *bool
debugPort *int
localSocket *string
disableXAttr *bool
extraOptions []string
fuseCommandPid int
// RDMA acceleration options
rdmaEnabled *bool
rdmaSidecarAddr *string
rdmaFallback *bool
rdmaReadOnly *bool
rdmaMaxConcurrent *int
rdmaTimeoutMs *int
}
var (
mountOptions MountOptions
mountCpuProfile *string
mountMemProfile *string
mountReadRetryTime *time.Duration
)
func init() {
cmdMount.Run = runMount // break init cycle
mountOptions.filer = cmdMount.Flag.String("filer", "localhost:8888", "comma-separated weed filer location")
mountOptions.filerMountRootPath = cmdMount.Flag.String("filer.path", "/", "mount this remote path from filer server")
mountOptions.dir = cmdMount.Flag.String("dir", ".", "mount weed filer to this directory")
mountOptions.dirAutoCreate = cmdMount.Flag.Bool("dirAutoCreate", false, "auto create the directory to mount to")
mountOptions.collection = cmdMount.Flag.String("collection", "", "collection to create the files")
mountOptions.collectionQuota = cmdMount.Flag.Int("collectionQuotaMB", 0, "quota for the collection")
mountOptions.replication = cmdMount.Flag.String("replication", "", "replication(e.g. 000, 001) to create to files. If empty, let filer decide.")
mountOptions.diskType = cmdMount.Flag.String("disk", "", "[hdd|ssd|<tag>] hard drive or solid state drive or any tag")
mountOptions.ttlSec = cmdMount.Flag.Int("ttl", 0, "file ttl in seconds")
mountOptions.chunkSizeLimitMB = cmdMount.Flag.Int("chunkSizeLimitMB", 2, "local write buffer size, also chunk large files")
mountOptions.concurrentWriters = cmdMount.Flag.Int("concurrentWriters", 32, "limit concurrent goroutine writers")
mountOptions.concurrentReaders = cmdMount.Flag.Int("concurrentReaders", 16, "limit concurrent chunk fetches for read operations")
mountOptions.cacheDirForRead = cmdMount.Flag.String("cacheDir", os.TempDir(), "local cache directory for file chunks and meta data")
mountOptions.cacheSizeMBForRead = cmdMount.Flag.Int64("cacheCapacityMB", 128, "file chunk read cache capacity in MB")
mountOptions.cacheDirForWrite = cmdMount.Flag.String("cacheDirWrite", "", "buffer writes mostly for large files")
mountOptions.cacheMetaTtlSec = cmdMount.Flag.Int("cacheMetaTtlSec", 60, "metadata cache validity seconds")
mountOptions.dataCenter = cmdMount.Flag.String("dataCenter", "", "prefer to write to the data center")
mountOptions.allowOthers = cmdMount.Flag.Bool("allowOthers", true, "allows other users to access the file system")
mountOptions.umaskString = cmdMount.Flag.String("umask", "022", "octal umask, e.g., 022, 0111")
mountOptions.nonempty = cmdMount.Flag.Bool("nonempty", false, "allows the mounting over a non-empty directory")
mountOptions.volumeServerAccess = cmdMount.Flag.String("volumeServerAccess", "direct", "access volume servers by [direct|publicUrl|filerProxy]")
mountOptions.uidMap = cmdMount.Flag.String("map.uid", "", "map local uid to uid on filer, comma-separated <local_uid>:<filer_uid>")
mountOptions.gidMap = cmdMount.Flag.String("map.gid", "", "map local gid to gid on filer, comma-separated <local_gid>:<filer_gid>")
mountOptions.readOnly = cmdMount.Flag.Bool("readOnly", false, "read only")
mountOptions.debug = cmdMount.Flag.Bool("debug", false, "serves runtime profiling data, e.g., http://localhost:<debug.port>/debug/pprof/goroutine?debug=2")
mountOptions.debugPort = cmdMount.Flag.Int("debug.port", 6061, "http port for debugging")
mountOptions.localSocket = cmdMount.Flag.String("localSocket", "", "default to /tmp/seaweedfs-mount-<mount_dir_hash>.sock")
mountOptions.disableXAttr = cmdMount.Flag.Bool("disableXAttr", false, "disable xattr")
mountOptions.fuseCommandPid = 0
// RDMA acceleration flags
mountOptions.rdmaEnabled = cmdMount.Flag.Bool("rdma.enabled", false, "enable RDMA acceleration for reads")
mountOptions.rdmaSidecarAddr = cmdMount.Flag.String("rdma.sidecar", "", "RDMA sidecar address (e.g., localhost:8081)")
mountOptions.rdmaFallback = cmdMount.Flag.Bool("rdma.fallback", true, "fallback to HTTP when RDMA fails")
mountOptions.rdmaReadOnly = cmdMount.Flag.Bool("rdma.readOnly", false, "use RDMA for reads only (writes use HTTP)")
mountOptions.rdmaMaxConcurrent = cmdMount.Flag.Int("rdma.maxConcurrent", 64, "max concurrent RDMA operations")
mountOptions.rdmaTimeoutMs = cmdMount.Flag.Int("rdma.timeoutMs", 5000, "RDMA operation timeout in milliseconds")
mountCpuProfile = cmdMount.Flag.String("cpuprofile", "", "cpu profile output file")
mountMemProfile = cmdMount.Flag.String("memprofile", "", "memory profile output file")
mountReadRetryTime = cmdMount.Flag.Duration("readRetryTime", 6*time.Second, "maximum read retry wait time")
}
var cmdMount = &Command{
UsageLine: "mount -filer=localhost:8888 -dir=/some/dir",
Short: "mount weed filer to a directory as file system in userspace(FUSE)",
Long: `mount weed filer to userspace.
Pre-requisites:
1) have SeaweedFS master and volume servers running
2) have a "weed filer" running
These 2 requirements can be achieved with one command "weed server -filer=true"
This uses github.com/seaweedfs/fuse, which enables writing FUSE file systems on
Linux, and OS X.
On OS X, it requires OSXFUSE (https://osxfuse.github.io/).
RDMA Acceleration:
For ultra-fast reads, enable RDMA acceleration with an RDMA sidecar:
weed mount -filer=localhost:8888 -dir=/mnt/seaweedfs \
-rdma.enabled=true -rdma.sidecar=localhost:8081
RDMA Options:
-rdma.enabled=false Enable RDMA acceleration for reads
-rdma.sidecar="" RDMA sidecar address (required if enabled)
-rdma.fallback=true Fallback to HTTP when RDMA fails
-rdma.readOnly=false Use RDMA for reads only (writes use HTTP)
-rdma.maxConcurrent=64 Max concurrent RDMA operations
-rdma.timeoutMs=5000 RDMA operation timeout in milliseconds
`,
}