Files
seaweedFS/weed/command/scaffold/filer.toml
Chris Lu 5a03b5538f filer: improve FoundationDB performance by disabling batch by default (#7770)
* filer: improve FoundationDB performance by disabling batch by default

This PR addresses a performance issue where FoundationDB filer was achieving
only ~757 ops/sec with 12 concurrent S3 clients, despite FDB being capable
of 17,000+ ops/sec.

Root cause: The write batcher was waiting up to 5ms for each operation to
batch, even though S3 semantics require waiting for durability confirmation.
This added artificial latency that defeated the purpose of batching.

Changes:
- Disable write batching by default (batch_enabled = false)
- Each write now commits immediately in its own transaction
- Reduce batch interval from 5ms to 1ms when batching is enabled
- Add batch_enabled config option to toggle behavior
- Improve batcher to collect available ops without blocking
- Add benchmarks comparing batch vs no-batch performance

Benchmark results (16 concurrent goroutines):
- With batch:    2,924 ops/sec (342,032 ns/op)
- Without batch: 4,625 ops/sec (216,219 ns/op)
- Improvement:   +58% faster

Configuration:
- Default: batch_enabled = false (optimal for S3 PUT latency)
- For bulk ingestion: set batch_enabled = true

Also fixes ARM64 Docker test setup (shell compatibility, fdbserver path).

* fix: address review comments - use atomic counter and remove duplicate batcher

- Use sync/atomic.Uint64 for unique filenames in concurrent benchmarks
- Remove duplicate batcher creation in createBenchmarkStoreWithBatching
  (initialize() already creates batcher when batchEnabled=true)

* fix: add realistic default values to benchmark store helper

Set directoryPrefix, timeout, and maxRetryDelay to reasonable defaults
for more realistic benchmark conditions.
2025-12-15 13:03:34 -08:00

436 lines
13 KiB
TOML

# A sample TOML config file for SeaweedFS filer store
# Used with "weed filer" or "weed server -filer"
# Put this file to one of the location, with descending priority
# ./filer.toml
# $HOME/.seaweedfs/filer.toml
# /etc/seaweedfs/filer.toml
####################################################
# Customizable filer server options
####################################################
[filer.options]
# with http DELETE, by default the filer would check whether a folder is empty.
# recursive_delete will delete all sub folders and files, similar to "rm -Rf"
recursive_delete = false
#max_file_name_length = 255
####################################################
# The following are filer store options
####################################################
[leveldb2]
# local on disk, mostly for simple single-machine setup, fairly scalable
# faster than previous leveldb, recommended.
enabled = true
dir = "./filerldb2" # directory to store level db files
[leveldb3]
# similar to leveldb2.
# each bucket has its own meta store.
enabled = false
dir = "./filerldb3" # directory to store level db files
[rocksdb]
# local on disk, similar to leveldb
# since it is using a C wrapper, you need to install rocksdb and build it by yourself
enabled = false
dir = "./filerrdb" # directory to store rocksdb files
[sqlite]
# local on disk, similar to leveldb
enabled = false
dbFile = "./filer.db" # sqlite db file
[mysql] # or memsql, tidb
# CREATE TABLE IF NOT EXISTS `filemeta` (
# `dirhash` BIGINT NOT NULL COMMENT 'first 64 bits of MD5 hash value of directory field',
# `name` VARCHAR(766) NOT NULL COMMENT 'directory or file name',
# `directory` TEXT NOT NULL COMMENT 'full path to parent directory',
# `meta` LONGBLOB,
# PRIMARY KEY (`dirhash`, `name`)
# ) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
enabled = false
# dsn will take priority over "hostname, port, username, password, database".
# [username[:password]@][protocol[(address)]]/dbname[?param1=value1&...&paramN=valueN]
dsn = "root@tcp(localhost:3306)/seaweedfs?collation=utf8mb4_bin"
enable_tls = false
ca_crt = "" # ca.crt dir when enable_tls set true
client_crt = "" # mysql client.crt dir when enable_tls set true
client_key = "" # mysql client.key dir when enable_tls set true
hostname = "localhost"
port = 3306
username = "root"
password = ""
database = "" # create or use an existing database
connection_max_idle = 10
connection_max_open = 50
connection_max_lifetime_seconds = 300
interpolateParams = false
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
enableUpsert = true
upsertQuery = """INSERT INTO `%s` (`dirhash`,`name`,`directory`,`meta`) VALUES (?,?,?,?) AS `new` ON DUPLICATE KEY UPDATE `meta` = `new`.`meta`"""
[mysql2] # or memsql, tidb
enabled = false
createTable = """
CREATE TABLE IF NOT EXISTS `%s` (
`dirhash` BIGINT NOT NULL,
`name` VARCHAR(766) NOT NULL,
`directory` TEXT NOT NULL,
`meta` LONGBLOB,
PRIMARY KEY (`dirhash`, `name`)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
"""
hostname = "localhost"
port = 3306
username = "root"
password = ""
database = "" # create or use an existing database
connection_max_idle = 10
connection_max_open = 50
connection_max_lifetime_seconds = 300
interpolateParams = false
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
enableUpsert = true
upsertQuery = """INSERT INTO `%s` (`dirhash`,`name`,`directory`,`meta`) VALUES (?,?,?,?) AS `new` ON DUPLICATE KEY UPDATE `meta` = `new`.`meta`"""
[postgres] # or cockroachdb, YugabyteDB
# CREATE TABLE IF NOT EXISTS filemeta (
# dirhash BIGINT,
# name VARCHAR(65535),
# directory VARCHAR(65535),
# meta bytea,
# PRIMARY KEY (dirhash, name)
# );
enabled = false
hostname = "localhost"
port = 5432
username = "postgres"
password = ""
database = "postgres" # create or use an existing database
schema = ""
sslmode = "disable"
# SSL certificate options for secure connections
# For sslmode=verify-full, uncomment and configure the following:
# sslcert = "/path/to/client.crt" # client certificate file
# sslkey = "/path/to/client.key" # client private key file
# sslrootcert = "/path/to/ca.crt" # CA certificate file
# sslcrl = "/path/to/client.crl" # Certificate Revocation List (CRL) (optional)
connection_max_idle = 10
connection_max_open = 50
connection_max_lifetime_seconds = 300
# Set to true when using PgBouncer connection pooler
pgbouncer_compatible = false
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
enableUpsert = true
upsertQuery = """
INSERT INTO "%[1]s" (dirhash, name, directory, meta)
VALUES($1, $2, $3, $4)
ON CONFLICT (dirhash, name) DO UPDATE SET
directory=EXCLUDED.directory,
meta=EXCLUDED.meta
"""
[postgres2]
enabled = false
createTable = """
CREATE TABLE IF NOT EXISTS "%s" (
dirhash BIGINT,
name VARCHAR(65535),
directory VARCHAR(65535),
meta bytea,
PRIMARY KEY (dirhash, name)
);
"""
hostname = "localhost"
port = 5432
username = "postgres"
password = ""
database = "postgres" # create or use an existing database
schema = ""
sslmode = "disable"
# SSL certificate options for secure connections
# For sslmode=verify-full, uncomment and configure the following:
# sslcert = "/path/to/client.crt" # client certificate file
# sslkey = "/path/to/client.key" # client private key file
# sslrootcert = "/path/to/ca.crt" # CA certificate file
# sslcrl = "/path/to/client.crl" # Certificate Revocation List (CRL) (optional)
connection_max_idle = 10
connection_max_open = 50
connection_max_lifetime_seconds = 300
# Set to true when using PgBouncer connection pooler
pgbouncer_compatible = false
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
enableUpsert = true
upsertQuery = """
INSERT INTO "%[1]s" (dirhash, name, directory, meta)
VALUES($1, $2, $3, $4)
ON CONFLICT (dirhash, name) DO UPDATE SET
directory=EXCLUDED.directory,
meta=EXCLUDED.meta
"""
[cassandra2]
# CREATE TABLE filemeta (
# dirhash bigint,
# directory varchar,
# name varchar,
# meta blob,
# PRIMARY KEY ((dirhash, directory), name)
# ) WITH CLUSTERING ORDER BY (name ASC);
enabled = false
keyspace = "seaweedfs"
hosts = [
"localhost:9042",
]
username = ""
password = ""
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
# Name of the datacenter local to this filer, used as host selection fallback.
localDC = ""
# Gocql connection timeout, default: 600ms
connection_timeout_millisecond = 600
[hbase]
enabled = false
zkquorum = ""
table = "seaweedfs"
[redis2]
enabled = false
address = "localhost:6379"
username = ""
password = ""
database = 0
# prefix for filer redis keys
keyPrefix = ""
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
[redis2_sentinel]
enabled = false
addresses = ["172.22.12.7:26379","172.22.12.8:26379","172.22.12.9:26379"]
masterName = "master"
username = ""
password = ""
database = 0
# prefix for filer redis keys
keyPrefix = ""
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
[redis_cluster2]
enabled = false
addresses = [
"localhost:30001",
"localhost:30002",
"localhost:30003",
"localhost:30004",
"localhost:30005",
"localhost:30006",
]
username = ""
password = ""
# prefix for filer redis keys
keyPrefix = ""
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
# allows reads from slave servers or the master, but all writes still go to the master
readOnly = false
# automatically use the closest Redis server for reads
routeByLatency = false
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
# The following lua redis stores uses lua to ensure atomicity
[redis_lua]
enabled = false
address = "localhost:6379"
username = ""
password = ""
database = 0
# prefix for filer redis keys
keyPrefix = ""
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
[redis_lua_sentinel]
enabled = false
addresses = ["172.22.12.7:26379","172.22.12.8:26379","172.22.12.9:26379"]
masterName = "master"
username = ""
password = ""
database = 0
# prefix for filer redis keys
keyPrefix = ""
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
[redis_lua_cluster]
enabled = false
addresses = [
"localhost:30001",
"localhost:30002",
"localhost:30003",
"localhost:30004",
"localhost:30005",
"localhost:30006",
]
username = ""
password = ""
# prefix for filer redis keys
keyPrefix = ""
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
# allows reads from slave servers or the master, but all writes still go to the master
readOnly = false
# automatically use the closest Redis server for reads
routeByLatency = false
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
[etcd]
enabled = false
servers = "localhost:2379"
username = ""
password = ""
key_prefix = "seaweedfs."
timeout = "3s"
# Set the CA certificate path
tls_ca_file=""
# Set the client certificate path
tls_client_crt_file=""
# Set the client private key path
tls_client_key_file=""
[mongodb]
enabled = false
uri = "mongodb://localhost:27017"
username = ""
password = ""
ssl = false
ssl_ca_file = ""
ssl_cert_file = ""
ssl_key_file = ""
insecure_skip_verify = false
option_pool_size = 0
database = "seaweedfs"
[elastic7]
enabled = false
servers = [
"http://localhost1:9200",
"http://localhost2:9200",
"http://localhost3:9200",
]
username = ""
password = ""
sniff_enabled = false
healthcheck_enabled = false
# increase the value is recommend, be sure the value in Elastic is greater or equal here
index.max_result_window = 10000
[arangodb] # in development dont use it
enabled = false
db_name = "seaweedfs"
servers=["http://localhost:8529"] # list of servers to connect to
# only basic auth supported for now
username=""
password=""
# skip tls cert validation
insecure_skip_verify = true
[ydb] # https://ydb.tech/
enabled = false
dsn = "grpc://localhost:2136?database=/local"
prefix = "seaweedfs"
useBucketPrefix = true # Fast Bucket Deletion
poolSizeLimit = 50
dialTimeOut = 10
# Authenticate produced with one of next environment variables:
# YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS=<path/to/sa_key_file> — used service account key file by path
# YDB_ANONYMOUS_CREDENTIALS="1" — used for authenticate with anonymous access. Anonymous access needs for connect to testing YDB installation
# YDB_METADATA_CREDENTIALS="1" — used metadata service for authenticate to YDB from yandex cloud virtual machine or from yandex function
# YDB_ACCESS_TOKEN_CREDENTIALS=<access_token> — used for authenticate to YDB with short-life access token. For example, access token may be IAM token
##########################
##########################
# To add path-specific filer store:
#
# 1. Add a name following the store type separated by a dot ".". E.g., cassandra2.tmp
# 2. Add a location configuration. E.g., location = "/tmp/"
# 3. Copy and customize all other configurations.
# Make sure they are not the same if using the same store type!
# 4. Set enabled to true
#
# The following is just using redis as an example
##########################
[redis2.tmp]
enabled = false
location = "/tmp/"
address = "localhost:6379"
username = ""
password = ""
database = 1
keyPrefix = ""
[tikv]
enabled = false
# If you have many pd address, use ',' split then:
# pdaddrs = "pdhost1:2379, pdhost2:2379, pdhost3:2379"
pdaddrs = "localhost:2379"
# prefix for filer TiKV keys, useful for sharing a TiKV cluster with multiple seaweedfs clusters
keyPrefix = ""
# Enable 1PC
enable_1pc = false
# batch delete count, default 10000 in code
#batchdelete_count = 20000
# Set the CA certificate path
ca_path=""
# Set the certificate path
cert_path=""
# Set the private key path
key_path=""
# The name list used to verify the cn name
verify_cn=""
[foundationdb]
# FoundationDB provides ACID transactions and horizontal scalability.
# Requires: go build -tags foundationdb
enabled = false
cluster_file = "/etc/foundationdb/fdb.cluster"
# api_version = 740
# timeout = "5s"
# directory_prefix = "seaweedfs"
# For bulk ingestion, enable batching: batch_enabled = true
[tarantool]
address = "localhost:3301"
user = "guest"
password = ""
timeout = "5s"
maxReconnects = 1000