* fix(tikv): replace DeleteRange with transaction-based batch deletes Fixes #7187 Problem: TiKV's DeleteRange API is a RawKV operation that bypasses transaction isolation. When SeaweedFS filer uses TiKV with txn client and another service uses RawKV client on the same cluster, DeleteFolderChildren can accidentally delete KV pairs from the RawKV client because DeleteRange operates at the raw key level without respecting transaction boundaries. Reproduction: 1. SeaweedFS filer using TiKV txn client for metadata 2. Another service using rawkv client on same TiKV cluster 3. Filer performs batch file deletion via DeleteFolderChildren 4. Result: ~50% of rawkv client's KV pairs get deleted Solution: Replace client.DeleteRange() (RawKV API) with transactional batch deletes using txn.Delete() within transactions. This ensures: - Transaction isolation - operations respect TiKV's MVCC boundaries - Keyspace separation - txn client and RawKV client stay isolated - Proper key handling - keys are copied to avoid iterator reuse issues - Batch processing - deletes batched (10K default) to manage memory Changes: 1. Core data structure: - Removed deleteRangeConcurrency field - Added batchCommitSize field (configurable, default 10000) 2. DeleteFolderChildren rewrite: - Replaced DeleteRange with iterative batch deletes - Added proper transaction lifecycle management - Implemented key copying to avoid iterator buffer reuse - Added batching to prevent memory exhaustion 3. New deleteBatch helper: - Handles transaction creation and lifecycle - Batches deletes within single transaction - Properly commits/rolls back based on context 4. Context propagation: - Updated RunInTxn to accept context parameter - All RunInTxn call sites now pass context - Enables proper timeout/cancellation handling 5. Configuration: - Removed deleterange_concurrency setting - Added batchdelete_count setting (default 10000) All critical review comments from PR #7188 have been addressed: - Proper key copying with append([]byte(nil), key...) - Conditional transaction rollback based on inContext flag - Context propagation for commits - Proper transaction lifecycle management - Configurable batch size Co-authored-by: giftz <giftz@users.noreply.github.com> * fix: remove extra closing brace causing syntax error in tikv_store.go --------- Co-authored-by: giftz <giftz@users.noreply.github.com>
406 lines
12 KiB
TOML
406 lines
12 KiB
TOML
# A sample TOML config file for SeaweedFS filer store
|
|
# Used with "weed filer" or "weed server -filer"
|
|
# Put this file to one of the location, with descending priority
|
|
# ./filer.toml
|
|
# $HOME/.seaweedfs/filer.toml
|
|
# /etc/seaweedfs/filer.toml
|
|
|
|
####################################################
|
|
# Customizable filer server options
|
|
####################################################
|
|
[filer.options]
|
|
# with http DELETE, by default the filer would check whether a folder is empty.
|
|
# recursive_delete will delete all sub folders and files, similar to "rm -Rf"
|
|
recursive_delete = false
|
|
#max_file_name_length = 255
|
|
|
|
####################################################
|
|
# The following are filer store options
|
|
####################################################
|
|
|
|
[leveldb2]
|
|
# local on disk, mostly for simple single-machine setup, fairly scalable
|
|
# faster than previous leveldb, recommended.
|
|
enabled = true
|
|
dir = "./filerldb2" # directory to store level db files
|
|
|
|
[leveldb3]
|
|
# similar to leveldb2.
|
|
# each bucket has its own meta store.
|
|
enabled = false
|
|
dir = "./filerldb3" # directory to store level db files
|
|
|
|
[rocksdb]
|
|
# local on disk, similar to leveldb
|
|
# since it is using a C wrapper, you need to install rocksdb and build it by yourself
|
|
enabled = false
|
|
dir = "./filerrdb" # directory to store rocksdb files
|
|
|
|
[sqlite]
|
|
# local on disk, similar to leveldb
|
|
enabled = false
|
|
dbFile = "./filer.db" # sqlite db file
|
|
|
|
[mysql] # or memsql, tidb
|
|
# CREATE TABLE IF NOT EXISTS `filemeta` (
|
|
# `dirhash` BIGINT NOT NULL COMMENT 'first 64 bits of MD5 hash value of directory field',
|
|
# `name` VARCHAR(766) NOT NULL COMMENT 'directory or file name',
|
|
# `directory` TEXT NOT NULL COMMENT 'full path to parent directory',
|
|
# `meta` LONGBLOB,
|
|
# PRIMARY KEY (`dirhash`, `name`)
|
|
# ) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
|
|
|
enabled = false
|
|
# dsn will take priority over "hostname, port, username, password, database".
|
|
# [username[:password]@][protocol[(address)]]/dbname[?param1=value1&...¶mN=valueN]
|
|
dsn = "root@tcp(localhost:3306)/seaweedfs?collation=utf8mb4_bin"
|
|
enable_tls = false
|
|
ca_crt = "" # ca.crt dir when enable_tls set true
|
|
client_crt = "" # mysql client.crt dir when enable_tls set true
|
|
client_key = "" # mysql client.key dir when enable_tls set true
|
|
hostname = "localhost"
|
|
port = 3306
|
|
username = "root"
|
|
password = ""
|
|
database = "" # create or use an existing database
|
|
connection_max_idle = 2
|
|
connection_max_open = 100
|
|
connection_max_lifetime_seconds = 0
|
|
interpolateParams = false
|
|
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
|
|
enableUpsert = true
|
|
upsertQuery = """INSERT INTO `%s` (`dirhash`,`name`,`directory`,`meta`) VALUES (?,?,?,?) AS `new` ON DUPLICATE KEY UPDATE `meta` = `new`.`meta`"""
|
|
|
|
[mysql2] # or memsql, tidb
|
|
enabled = false
|
|
createTable = """
|
|
CREATE TABLE IF NOT EXISTS `%s` (
|
|
`dirhash` BIGINT NOT NULL,
|
|
`name` VARCHAR(766) NOT NULL,
|
|
`directory` TEXT NOT NULL,
|
|
`meta` LONGBLOB,
|
|
PRIMARY KEY (`dirhash`, `name`)
|
|
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
|
"""
|
|
hostname = "localhost"
|
|
port = 3306
|
|
username = "root"
|
|
password = ""
|
|
database = "" # create or use an existing database
|
|
connection_max_idle = 2
|
|
connection_max_open = 100
|
|
connection_max_lifetime_seconds = 0
|
|
interpolateParams = false
|
|
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
|
|
enableUpsert = true
|
|
upsertQuery = """INSERT INTO `%s` (`dirhash`,`name`,`directory`,`meta`) VALUES (?,?,?,?) AS `new` ON DUPLICATE KEY UPDATE `meta` = `new`.`meta`"""
|
|
|
|
[postgres] # or cockroachdb, YugabyteDB
|
|
# CREATE TABLE IF NOT EXISTS filemeta (
|
|
# dirhash BIGINT,
|
|
# name VARCHAR(65535),
|
|
# directory VARCHAR(65535),
|
|
# meta bytea,
|
|
# PRIMARY KEY (dirhash, name)
|
|
# );
|
|
enabled = false
|
|
hostname = "localhost"
|
|
port = 5432
|
|
username = "postgres"
|
|
password = ""
|
|
database = "postgres" # create or use an existing database
|
|
schema = ""
|
|
sslmode = "disable"
|
|
# SSL certificate options for secure connections
|
|
# For sslmode=verify-full, uncomment and configure the following:
|
|
# sslcert = "/path/to/client.crt" # client certificate file
|
|
# sslkey = "/path/to/client.key" # client private key file
|
|
# sslrootcert = "/path/to/ca.crt" # CA certificate file
|
|
# sslcrl = "/path/to/client.crl" # Certificate Revocation List (CRL) (optional)
|
|
connection_max_idle = 100
|
|
connection_max_open = 100
|
|
connection_max_lifetime_seconds = 0
|
|
# Set to true when using PgBouncer connection pooler
|
|
pgbouncer_compatible = false
|
|
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
|
|
enableUpsert = true
|
|
upsertQuery = """
|
|
INSERT INTO "%[1]s" (dirhash, name, directory, meta)
|
|
VALUES($1, $2, $3, $4)
|
|
ON CONFLICT (dirhash, name) DO UPDATE SET
|
|
directory=EXCLUDED.directory,
|
|
meta=EXCLUDED.meta
|
|
"""
|
|
|
|
[postgres2]
|
|
enabled = false
|
|
createTable = """
|
|
CREATE TABLE IF NOT EXISTS "%s" (
|
|
dirhash BIGINT,
|
|
name VARCHAR(65535),
|
|
directory VARCHAR(65535),
|
|
meta bytea,
|
|
PRIMARY KEY (dirhash, name)
|
|
);
|
|
"""
|
|
hostname = "localhost"
|
|
port = 5432
|
|
username = "postgres"
|
|
password = ""
|
|
database = "postgres" # create or use an existing database
|
|
schema = ""
|
|
sslmode = "disable"
|
|
# SSL certificate options for secure connections
|
|
# For sslmode=verify-full, uncomment and configure the following:
|
|
# sslcert = "/path/to/client.crt" # client certificate file
|
|
# sslkey = "/path/to/client.key" # client private key file
|
|
# sslrootcert = "/path/to/ca.crt" # CA certificate file
|
|
# sslcrl = "/path/to/client.crl" # Certificate Revocation List (CRL) (optional)
|
|
connection_max_idle = 100
|
|
connection_max_open = 100
|
|
connection_max_lifetime_seconds = 0
|
|
# Set to true when using PgBouncer connection pooler
|
|
pgbouncer_compatible = false
|
|
# if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
|
|
enableUpsert = true
|
|
upsertQuery = """
|
|
INSERT INTO "%[1]s" (dirhash, name, directory, meta)
|
|
VALUES($1, $2, $3, $4)
|
|
ON CONFLICT (dirhash, name) DO UPDATE SET
|
|
directory=EXCLUDED.directory,
|
|
meta=EXCLUDED.meta
|
|
"""
|
|
|
|
[cassandra2]
|
|
# CREATE TABLE filemeta (
|
|
# dirhash bigint,
|
|
# directory varchar,
|
|
# name varchar,
|
|
# meta blob,
|
|
# PRIMARY KEY ((dirhash, directory), name)
|
|
# ) WITH CLUSTERING ORDER BY (name ASC);
|
|
enabled = false
|
|
keyspace = "seaweedfs"
|
|
hosts = [
|
|
"localhost:9042",
|
|
]
|
|
username = ""
|
|
password = ""
|
|
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
|
|
superLargeDirectories = []
|
|
# Name of the datacenter local to this filer, used as host selection fallback.
|
|
localDC = ""
|
|
# Gocql connection timeout, default: 600ms
|
|
connection_timeout_millisecond = 600
|
|
|
|
[hbase]
|
|
enabled = false
|
|
zkquorum = ""
|
|
table = "seaweedfs"
|
|
|
|
[redis2]
|
|
enabled = false
|
|
address = "localhost:6379"
|
|
password = ""
|
|
database = 0
|
|
enable_tls = false
|
|
ca_cert_path = ""
|
|
client_cert_path = ""
|
|
client_key_path = ""
|
|
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
|
|
superLargeDirectories = []
|
|
|
|
[redis2_sentinel]
|
|
enabled = false
|
|
addresses = ["172.22.12.7:26379","172.22.12.8:26379","172.22.12.9:26379"]
|
|
masterName = "master"
|
|
username = ""
|
|
password = ""
|
|
database = 0
|
|
enable_tls = false
|
|
ca_cert_path = ""
|
|
client_cert_path = ""
|
|
client_key_path = ""
|
|
|
|
[redis_cluster2]
|
|
enabled = false
|
|
addresses = [
|
|
"localhost:30001",
|
|
"localhost:30002",
|
|
"localhost:30003",
|
|
"localhost:30004",
|
|
"localhost:30005",
|
|
"localhost:30006",
|
|
]
|
|
password = ""
|
|
enable_tls = false
|
|
ca_cert_path = ""
|
|
client_cert_path = ""
|
|
client_key_path = ""
|
|
# allows reads from slave servers or the master, but all writes still go to the master
|
|
readOnly = false
|
|
# automatically use the closest Redis server for reads
|
|
routeByLatency = false
|
|
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
|
|
superLargeDirectories = []
|
|
|
|
# The following lua redis stores uses lua to ensure atomicity
|
|
[redis_lua]
|
|
enabled = false
|
|
address = "localhost:6379"
|
|
password = ""
|
|
database = 0
|
|
enable_tls = false
|
|
ca_cert_path = ""
|
|
client_cert_path = ""
|
|
client_key_path = ""
|
|
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
|
|
superLargeDirectories = []
|
|
|
|
[redis_lua_sentinel]
|
|
enabled = false
|
|
addresses = ["172.22.12.7:26379","172.22.12.8:26379","172.22.12.9:26379"]
|
|
masterName = "master"
|
|
username = ""
|
|
password = ""
|
|
database = 0
|
|
enable_tls = false
|
|
ca_cert_path = ""
|
|
client_cert_path = ""
|
|
client_key_path = ""
|
|
|
|
[redis_lua_cluster]
|
|
enabled = false
|
|
addresses = [
|
|
"localhost:30001",
|
|
"localhost:30002",
|
|
"localhost:30003",
|
|
"localhost:30004",
|
|
"localhost:30005",
|
|
"localhost:30006",
|
|
]
|
|
password = ""
|
|
enable_tls = false
|
|
ca_cert_path = ""
|
|
client_cert_path = ""
|
|
client_key_path = ""
|
|
# allows reads from slave servers or the master, but all writes still go to the master
|
|
readOnly = false
|
|
# automatically use the closest Redis server for reads
|
|
routeByLatency = false
|
|
# This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
|
|
superLargeDirectories = []
|
|
|
|
[etcd]
|
|
enabled = false
|
|
servers = "localhost:2379"
|
|
username = ""
|
|
password = ""
|
|
key_prefix = "seaweedfs."
|
|
timeout = "3s"
|
|
# Set the CA certificate path
|
|
tls_ca_file=""
|
|
# Set the client certificate path
|
|
tls_client_crt_file=""
|
|
# Set the client private key path
|
|
tls_client_key_file=""
|
|
|
|
[mongodb]
|
|
enabled = false
|
|
uri = "mongodb://localhost:27017"
|
|
username = ""
|
|
password = ""
|
|
ssl = false
|
|
ssl_ca_file = ""
|
|
ssl_cert_file = ""
|
|
ssl_key_file = ""
|
|
insecure_skip_verify = false
|
|
option_pool_size = 0
|
|
database = "seaweedfs"
|
|
|
|
[elastic7]
|
|
enabled = false
|
|
servers = [
|
|
"http://localhost1:9200",
|
|
"http://localhost2:9200",
|
|
"http://localhost3:9200",
|
|
]
|
|
username = ""
|
|
password = ""
|
|
sniff_enabled = false
|
|
healthcheck_enabled = false
|
|
# increase the value is recommend, be sure the value in Elastic is greater or equal here
|
|
index.max_result_window = 10000
|
|
|
|
|
|
[arangodb] # in development dont use it
|
|
enabled = false
|
|
db_name = "seaweedfs"
|
|
servers=["http://localhost:8529"] # list of servers to connect to
|
|
# only basic auth supported for now
|
|
username=""
|
|
password=""
|
|
# skip tls cert validation
|
|
insecure_skip_verify = true
|
|
|
|
[ydb] # https://ydb.tech/
|
|
enabled = false
|
|
dsn = "grpc://localhost:2136?database=/local"
|
|
prefix = "seaweedfs"
|
|
useBucketPrefix = true # Fast Bucket Deletion
|
|
poolSizeLimit = 50
|
|
dialTimeOut = 10
|
|
|
|
# Authenticate produced with one of next environment variables:
|
|
# YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS=<path/to/sa_key_file> — used service account key file by path
|
|
# YDB_ANONYMOUS_CREDENTIALS="1" — used for authenticate with anonymous access. Anonymous access needs for connect to testing YDB installation
|
|
# YDB_METADATA_CREDENTIALS="1" — used metadata service for authenticate to YDB from yandex cloud virtual machine or from yandex function
|
|
# YDB_ACCESS_TOKEN_CREDENTIALS=<access_token> — used for authenticate to YDB with short-life access token. For example, access token may be IAM token
|
|
|
|
##########################
|
|
##########################
|
|
# To add path-specific filer store:
|
|
#
|
|
# 1. Add a name following the store type separated by a dot ".". E.g., cassandra2.tmp
|
|
# 2. Add a location configuration. E.g., location = "/tmp/"
|
|
# 3. Copy and customize all other configurations.
|
|
# Make sure they are not the same if using the same store type!
|
|
# 4. Set enabled to true
|
|
#
|
|
# The following is just using redis as an example
|
|
##########################
|
|
[redis2.tmp]
|
|
enabled = false
|
|
location = "/tmp/"
|
|
address = "localhost:6379"
|
|
password = ""
|
|
database = 1
|
|
|
|
[tikv]
|
|
enabled = false
|
|
# If you have many pd address, use ',' split then:
|
|
# pdaddrs = "pdhost1:2379, pdhost2:2379, pdhost3:2379"
|
|
pdaddrs = "localhost:2379"
|
|
# Enable 1PC
|
|
enable_1pc = false
|
|
# batch delete count, default 10000 in code
|
|
#batchdelete_count = 20000
|
|
|
|
# Set the CA certificate path
|
|
ca_path=""
|
|
# Set the certificate path
|
|
cert_path=""
|
|
# Set the private key path
|
|
key_path=""
|
|
# The name list used to verify the cn name
|
|
verify_cn=""
|
|
|
|
[tarantool]
|
|
address = "localhost:3301"
|
|
user = "guest"
|
|
password = ""
|
|
timeout = "5s"
|
|
maxReconnects = 1000
|
|
|
|
|