Files
seaweedFS/weed/command/scaffold/filer.toml
Chris Lu 5287d9f3e3 fix(tikv): replace DeleteRange with transaction-based batch deletes (#7557)
* fix(tikv): replace DeleteRange with transaction-based batch deletes

Fixes #7187

Problem:
TiKV's DeleteRange API is a RawKV operation that bypasses transaction
isolation. When SeaweedFS filer uses TiKV with txn client and another
service uses RawKV client on the same cluster, DeleteFolderChildren
can accidentally delete KV pairs from the RawKV client because
DeleteRange operates at the raw key level without respecting
transaction boundaries.

Reproduction:
1. SeaweedFS filer using TiKV txn client for metadata
2. Another service using rawkv client on same TiKV cluster
3. Filer performs batch file deletion via DeleteFolderChildren
4. Result: ~50% of rawkv client's KV pairs get deleted

Solution:
Replace client.DeleteRange() (RawKV API) with transactional batch
deletes using txn.Delete() within transactions. This ensures:
- Transaction isolation - operations respect TiKV's MVCC boundaries
- Keyspace separation - txn client and RawKV client stay isolated
- Proper key handling - keys are copied to avoid iterator reuse issues
- Batch processing - deletes batched (10K default) to manage memory

Changes:
1. Core data structure:
   - Removed deleteRangeConcurrency field
   - Added batchCommitSize field (configurable, default 10000)

2. DeleteFolderChildren rewrite:
   - Replaced DeleteRange with iterative batch deletes
   - Added proper transaction lifecycle management
   - Implemented key copying to avoid iterator buffer reuse
   - Added batching to prevent memory exhaustion

3. New deleteBatch helper:
   - Handles transaction creation and lifecycle
   - Batches deletes within single transaction
   - Properly commits/rolls back based on context

4. Context propagation:
   - Updated RunInTxn to accept context parameter
   - All RunInTxn call sites now pass context
   - Enables proper timeout/cancellation handling

5. Configuration:
   - Removed deleterange_concurrency setting
   - Added batchdelete_count setting (default 10000)

All critical review comments from PR #7188 have been addressed:
- Proper key copying with append([]byte(nil), key...)
- Conditional transaction rollback based on inContext flag
- Context propagation for commits
- Proper transaction lifecycle management
- Configurable batch size

Co-authored-by: giftz <giftz@users.noreply.github.com>

* fix: remove extra closing brace causing syntax error in tikv_store.go

---------

Co-authored-by: giftz <giftz@users.noreply.github.com>
2025-11-26 14:45:56 -08:00

406 lines
12 KiB
TOML

# A sample TOML config file for SeaweedFS filer store
# Used with "weed filer" or "weed server -filer"
# Put this file to one of the location, with descending priority
# ./filer.toml
# $HOME/.seaweedfs/filer.toml
# /etc/seaweedfs/filer.toml
####################################################
# Customizable filer server options
####################################################
[filer.options]
# with http DELETE, by default the filer would check whether a folder is empty.
# recursive_delete will delete all sub folders and files, similar to "rm -Rf"
recursive_delete = false
#max_file_name_length = 255
####################################################
# The following are filer store options
####################################################
[leveldb2]
# local on disk, mostly for simple single-machine setup, fairly scalable
# faster than previous leveldb, recommended.
enabled = true
dir = "./filerldb2" # directory to store level db files
[leveldb3]
# similar to leveldb2.
# each bucket has its own meta store.
enabled = false
dir = "./filerldb3" # directory to store level db files
[rocksdb]
# local on disk, similar to leveldb
# since it is using a C wrapper, you need to install rocksdb and build it by yourself
enabled = false
dir = "./filerrdb" # directory to store rocksdb files
[sqlite]
# local on disk, similar to leveldb
enabled = false
dbFile = "./filer.db" # sqlite db file
[mysql] # or memsql, tidb
# CREATE TABLE IF NOT EXISTS `filemeta` (
# `dirhash` BIGINT NOT NULL COMMENT 'first 64 bits of MD5 hash value of directory field',
# `name` VARCHAR(766) NOT NULL COMMENT 'directory or file name',
# `directory` TEXT NOT NULL COMMENT 'full path to parent directory',
# `meta` LONGBLOB,
# PRIMARY KEY (`dirhash`, `name`)
# ) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
enabled = false
# dsn will take priority over "hostname, port, username, password, database".
# [username[:password]@][protocol[(address)]]/dbname[?param1=value1&...&paramN=valueN]
dsn = "root@tcp(localhost:3306)/seaweedfs?collation=utf8mb4_bin"
enable_tls = false
ca_crt = "" # ca.crt dir when enable_tls set true
client_crt = "" # mysql client.crt dir when enable_tls set true
client_key = "" # mysql client.key dir when enable_tls set true
hostname = "localhost"
port = 3306
username = "root"
password = ""
database = "" # create or use an existing database
connection_max_idle = 2
connection_max_open = 100
connection_max_lifetime_seconds = 0
interpolateParams = false
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
enableUpsert = true
upsertQuery = """INSERT INTO `%s` (`dirhash`,`name`,`directory`,`meta`) VALUES (?,?,?,?) AS `new` ON DUPLICATE KEY UPDATE `meta` = `new`.`meta`"""
[mysql2] # or memsql, tidb
enabled = false
createTable = """
CREATE TABLE IF NOT EXISTS `%s` (
`dirhash` BIGINT NOT NULL,
`name` VARCHAR(766) NOT NULL,
`directory` TEXT NOT NULL,
`meta` LONGBLOB,
PRIMARY KEY (`dirhash`, `name`)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
"""
hostname = "localhost"
port = 3306
username = "root"
password = ""
database = "" # create or use an existing database
connection_max_idle = 2
connection_max_open = 100
connection_max_lifetime_seconds = 0
interpolateParams = false
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
enableUpsert = true
upsertQuery = """INSERT INTO `%s` (`dirhash`,`name`,`directory`,`meta`) VALUES (?,?,?,?) AS `new` ON DUPLICATE KEY UPDATE `meta` = `new`.`meta`"""
[postgres] # or cockroachdb, YugabyteDB
# CREATE TABLE IF NOT EXISTS filemeta (
# dirhash BIGINT,
# name VARCHAR(65535),
# directory VARCHAR(65535),
# meta bytea,
# PRIMARY KEY (dirhash, name)
# );
enabled = false
hostname = "localhost"
port = 5432
username = "postgres"
password = ""
database = "postgres" # create or use an existing database
schema = ""
sslmode = "disable"
# SSL certificate options for secure connections
# For sslmode=verify-full, uncomment and configure the following:
# sslcert = "/path/to/client.crt" # client certificate file
# sslkey = "/path/to/client.key" # client private key file
# sslrootcert = "/path/to/ca.crt" # CA certificate file
# sslcrl = "/path/to/client.crl" # Certificate Revocation List (CRL) (optional)
connection_max_idle = 100
connection_max_open = 100
connection_max_lifetime_seconds = 0
# Set to true when using PgBouncer connection pooler
pgbouncer_compatible = false
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
enableUpsert = true
upsertQuery = """
INSERT INTO "%[1]s" (dirhash, name, directory, meta)
VALUES($1, $2, $3, $4)
ON CONFLICT (dirhash, name) DO UPDATE SET
directory=EXCLUDED.directory,
meta=EXCLUDED.meta
"""
[postgres2]
enabled = false
createTable = """
CREATE TABLE IF NOT EXISTS "%s" (
dirhash BIGINT,
name VARCHAR(65535),
directory VARCHAR(65535),
meta bytea,
PRIMARY KEY (dirhash, name)
);
"""
hostname = "localhost"
port = 5432
username = "postgres"
password = ""
database = "postgres" # create or use an existing database
schema = ""
sslmode = "disable"
# SSL certificate options for secure connections
# For sslmode=verify-full, uncomment and configure the following:
# sslcert = "/path/to/client.crt" # client certificate file
# sslkey = "/path/to/client.key" # client private key file
# sslrootcert = "/path/to/ca.crt" # CA certificate file
# sslcrl = "/path/to/client.crl" # Certificate Revocation List (CRL) (optional)
connection_max_idle = 100
connection_max_open = 100
connection_max_lifetime_seconds = 0
# Set to true when using PgBouncer connection pooler
pgbouncer_compatible = false
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
enableUpsert = true
upsertQuery = """
INSERT INTO "%[1]s" (dirhash, name, directory, meta)
VALUES($1, $2, $3, $4)
ON CONFLICT (dirhash, name) DO UPDATE SET
directory=EXCLUDED.directory,
meta=EXCLUDED.meta
"""
[cassandra2]
# CREATE TABLE filemeta (
# dirhash bigint,
# directory varchar,
# name varchar,
# meta blob,
# PRIMARY KEY ((dirhash, directory), name)
# ) WITH CLUSTERING ORDER BY (name ASC);
enabled = false
keyspace = "seaweedfs"
hosts = [
"localhost:9042",
]
username = ""
password = ""
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
# Name of the datacenter local to this filer, used as host selection fallback.
localDC = ""
# Gocql connection timeout, default: 600ms
connection_timeout_millisecond = 600
[hbase]
enabled = false
zkquorum = ""
table = "seaweedfs"
[redis2]
enabled = false
address = "localhost:6379"
password = ""
database = 0
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
[redis2_sentinel]
enabled = false
addresses = ["172.22.12.7:26379","172.22.12.8:26379","172.22.12.9:26379"]
masterName = "master"
username = ""
password = ""
database = 0
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
[redis_cluster2]
enabled = false
addresses = [
"localhost:30001",
"localhost:30002",
"localhost:30003",
"localhost:30004",
"localhost:30005",
"localhost:30006",
]
password = ""
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
# allows reads from slave servers or the master, but all writes still go to the master
readOnly = false
# automatically use the closest Redis server for reads
routeByLatency = false
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
# The following lua redis stores uses lua to ensure atomicity
[redis_lua]
enabled = false
address = "localhost:6379"
password = ""
database = 0
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
[redis_lua_sentinel]
enabled = false
addresses = ["172.22.12.7:26379","172.22.12.8:26379","172.22.12.9:26379"]
masterName = "master"
username = ""
password = ""
database = 0
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
[redis_lua_cluster]
enabled = false
addresses = [
"localhost:30001",
"localhost:30002",
"localhost:30003",
"localhost:30004",
"localhost:30005",
"localhost:30006",
]
password = ""
enable_tls = false
ca_cert_path = ""
client_cert_path = ""
client_key_path = ""
# allows reads from slave servers or the master, but all writes still go to the master
readOnly = false
# automatically use the closest Redis server for reads
routeByLatency = false
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
superLargeDirectories = []
[etcd]
enabled = false
servers = "localhost:2379"
username = ""
password = ""
key_prefix = "seaweedfs."
timeout = "3s"
# Set the CA certificate path
tls_ca_file=""
# Set the client certificate path
tls_client_crt_file=""
# Set the client private key path
tls_client_key_file=""
[mongodb]
enabled = false
uri = "mongodb://localhost:27017"
username = ""
password = ""
ssl = false
ssl_ca_file = ""
ssl_cert_file = ""
ssl_key_file = ""
insecure_skip_verify = false
option_pool_size = 0
database = "seaweedfs"
[elastic7]
enabled = false
servers = [
"http://localhost1:9200",
"http://localhost2:9200",
"http://localhost3:9200",
]
username = ""
password = ""
sniff_enabled = false
healthcheck_enabled = false
# increase the value is recommend, be sure the value in Elastic is greater or equal here
index.max_result_window = 10000
[arangodb] # in development dont use it
enabled = false
db_name = "seaweedfs"
servers=["http://localhost:8529"] # list of servers to connect to
# only basic auth supported for now
username=""
password=""
# skip tls cert validation
insecure_skip_verify = true
[ydb] # https://ydb.tech/
enabled = false
dsn = "grpc://localhost:2136?database=/local"
prefix = "seaweedfs"
useBucketPrefix = true # Fast Bucket Deletion
poolSizeLimit = 50
dialTimeOut = 10
# Authenticate produced with one of next environment variables:
# YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS=<path/to/sa_key_file> — used service account key file by path
# YDB_ANONYMOUS_CREDENTIALS="1" — used for authenticate with anonymous access. Anonymous access needs for connect to testing YDB installation
# YDB_METADATA_CREDENTIALS="1" — used metadata service for authenticate to YDB from yandex cloud virtual machine or from yandex function
# YDB_ACCESS_TOKEN_CREDENTIALS=<access_token> — used for authenticate to YDB with short-life access token. For example, access token may be IAM token
##########################
##########################
# To add path-specific filer store:
#
# 1. Add a name following the store type separated by a dot ".". E.g., cassandra2.tmp
# 2. Add a location configuration. E.g., location = "/tmp/"
# 3. Copy and customize all other configurations.
# Make sure they are not the same if using the same store type!
# 4. Set enabled to true
#
# The following is just using redis as an example
##########################
[redis2.tmp]
enabled = false
location = "/tmp/"
address = "localhost:6379"
password = ""
database = 1
[tikv]
enabled = false
# If you have many pd address, use ',' split then:
# pdaddrs = "pdhost1:2379, pdhost2:2379, pdhost3:2379"
pdaddrs = "localhost:2379"
# Enable 1PC
enable_1pc = false
# batch delete count, default 10000 in code
#batchdelete_count = 20000
# Set the CA certificate path
ca_path=""
# Set the certificate path
cert_path=""
# Set the private key path
key_path=""
# The name list used to verify the cn name
verify_cn=""
[tarantool]
address = "localhost:3301"
user = "guest"
password = ""
timeout = "5s"
maxReconnects = 1000