filer.sync: support per-cluster mTLS with -a.security and -b.security (#8872)

* filer.sync: support per-cluster mTLS with -a.security and -b.security flags

When syncing between two clusters that use different certificate authorities,
a single security.toml cannot authenticate to both. Add -a.security and
-b.security flags so each filer can use its own security.toml for TLS.

Closes #8481

* security: fatal on failure to read explicitly provided security config

When -a.security or -b.security is specified, falling back to insecure
credentials on read error would silently bypass mTLS. Fatal instead.

* fix(filer.sync): use source filer's fromTsMs flag in initOffsetFromTsMs

A→B was using bFromTsMs and B→A was using aFromTsMs — these were
swapped. Each path should seed the target's offset with the source
filer's starting timestamp.

* security: return error from LoadClientTLSFromFile, resolve relative PEM paths

Change LoadClientTLSFromFile to return (grpc.DialOption, error) so
callers can handle failures explicitly instead of a silent insecure
fallback. Resolve relative PEM paths (grpc.ca, grpc.client.cert,
grpc.client.key) against the config file's directory.
This commit is contained in:
Chris Lu
2026-04-01 11:05:43 -07:00
committed by GitHub
parent 44d5cb8f90
commit 8572aae403
3 changed files with 61 additions and 14 deletions

View File

@@ -53,6 +53,8 @@ type SyncOptions struct {
chunkConcurrency *int chunkConcurrency *int
aDoDeleteFiles *bool aDoDeleteFiles *bool
bDoDeleteFiles *bool bDoDeleteFiles *bool
aSecurity *string
bSecurity *string
clientId int32 clientId int32
clientEpoch atomic.Int32 clientEpoch atomic.Int32
debug *bool debug *bool
@@ -113,6 +115,8 @@ func init() {
syncOptions.metricsHttpPort = cmdFilerSynchronize.Flag.Int("metricsPort", 0, "metrics listen port") syncOptions.metricsHttpPort = cmdFilerSynchronize.Flag.Int("metricsPort", 0, "metrics listen port")
syncOptions.aDoDeleteFiles = cmdFilerSynchronize.Flag.Bool("a.doDeleteFiles", true, "delete and update files when synchronizing on filer A") syncOptions.aDoDeleteFiles = cmdFilerSynchronize.Flag.Bool("a.doDeleteFiles", true, "delete and update files when synchronizing on filer A")
syncOptions.bDoDeleteFiles = cmdFilerSynchronize.Flag.Bool("b.doDeleteFiles", true, "delete and update files when synchronizing on filer B") syncOptions.bDoDeleteFiles = cmdFilerSynchronize.Flag.Bool("b.doDeleteFiles", true, "delete and update files when synchronizing on filer B")
syncOptions.aSecurity = cmdFilerSynchronize.Flag.String("a.security", "", "security.toml file for filer A when clusters use different certificates")
syncOptions.bSecurity = cmdFilerSynchronize.Flag.String("b.security", "", "security.toml file for filer B when clusters use different certificates")
syncOptions.debug = cmdFilerSynchronize.Flag.Bool("debug", false, "serves runtime profiling data via pprof on the port specified by -debug.port") syncOptions.debug = cmdFilerSynchronize.Flag.Bool("debug", false, "serves runtime profiling data via pprof on the port specified by -debug.port")
syncOptions.debugPort = cmdFilerSynchronize.Flag.Int("debug.port", 6060, "http port for debugging") syncOptions.debugPort = cmdFilerSynchronize.Flag.Int("debug.port", 6060, "http port for debugging")
syncOptions.clientId = util.RandomInt32() syncOptions.clientId = util.RandomInt32()
@@ -144,6 +148,22 @@ func runFilerSynchronize(cmd *Command, args []string) bool {
util.LoadSecurityConfiguration() util.LoadSecurityConfiguration()
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
// per-filer TLS when clusters use different certificates
grpcDialOptionA := grpcDialOption
grpcDialOptionB := grpcDialOption
if *syncOptions.aSecurity != "" {
var err error
if grpcDialOptionA, err = security.LoadClientTLSFromFile(*syncOptions.aSecurity, "grpc.client"); err != nil {
glog.Fatalf("load security config for filer A: %v", err)
}
}
if *syncOptions.bSecurity != "" {
var err error
if grpcDialOptionB, err = security.LoadClientTLSFromFile(*syncOptions.bSecurity, "grpc.client"); err != nil {
glog.Fatalf("load security config for filer B: %v", err)
}
}
grace.SetupProfiling(*syncCpuProfile, *syncMemProfile) grace.SetupProfiling(*syncCpuProfile, *syncMemProfile)
filerA := pb.ServerAddress(*syncOptions.filerA) filerA := pb.ServerAddress(*syncOptions.filerA)
@@ -153,13 +173,13 @@ func runFilerSynchronize(cmd *Command, args []string) bool {
go statsCollect.StartMetricsServer(*syncOptions.metricsHttpIp, *syncOptions.metricsHttpPort) go statsCollect.StartMetricsServer(*syncOptions.metricsHttpIp, *syncOptions.metricsHttpPort)
// read a filer signature // read a filer signature
aFilerSignature, aFilerErr := replication.ReadFilerSignature(grpcDialOption, filerA) aFilerSignature, aFilerErr := replication.ReadFilerSignature(grpcDialOptionA, filerA)
if aFilerErr != nil { if aFilerErr != nil {
glog.Errorf("get filer 'a' signature %d error from %s to %s: %v", aFilerSignature, *syncOptions.filerA, *syncOptions.filerB, aFilerErr) glog.Errorf("get filer 'a' signature %d error from %s to %s: %v", aFilerSignature, *syncOptions.filerA, *syncOptions.filerB, aFilerErr)
return true return true
} }
// read b filer signature // read b filer signature
bFilerSignature, bFilerErr := replication.ReadFilerSignature(grpcDialOption, filerB) bFilerSignature, bFilerErr := replication.ReadFilerSignature(grpcDialOptionB, filerB)
if bFilerErr != nil { if bFilerErr != nil {
glog.Errorf("get filer 'b' signature %d error from %s to %s: %v", bFilerSignature, *syncOptions.filerA, *syncOptions.filerB, bFilerErr) glog.Errorf("get filer 'b' signature %d error from %s to %s: %v", bFilerSignature, *syncOptions.filerA, *syncOptions.filerB, bFilerErr)
return true return true
@@ -189,9 +209,9 @@ func runFilerSynchronize(cmd *Command, args []string) bool {
go func() { go func() {
// a->b // a->b
// set synchronization start timestamp to offset // set synchronization start timestamp to offset
initOffsetError := initOffsetFromTsMs(grpcDialOption, filerB, aFilerSignature, *syncOptions.bFromTsMs, getSignaturePrefixByPath(*syncOptions.aPath)) initOffsetError := initOffsetFromTsMs(grpcDialOptionB, filerB, aFilerSignature, *syncOptions.aFromTsMs, getSignaturePrefixByPath(*syncOptions.aPath))
if initOffsetError != nil { if initOffsetError != nil {
glog.Errorf("init offset from timestamp %d error from %s to %s: %v", *syncOptions.bFromTsMs, *syncOptions.filerA, *syncOptions.filerB, initOffsetError) glog.Errorf("init offset from timestamp %d error from %s to %s: %v", *syncOptions.aFromTsMs, *syncOptions.filerA, *syncOptions.filerB, initOffsetError)
os.Exit(2) os.Exit(2)
} }
for { for {
@@ -199,11 +219,12 @@ func runFilerSynchronize(cmd *Command, args []string) bool {
err := doSubscribeFilerMetaChanges( err := doSubscribeFilerMetaChanges(
syncOptions.clientId, syncOptions.clientId,
syncOptions.clientEpoch.Load(), syncOptions.clientEpoch.Load(),
grpcDialOption, grpcDialOptionA,
filerA, filerA,
*syncOptions.aPath, *syncOptions.aPath,
util.StringSplit(*syncOptions.aExcludePaths, ","), util.StringSplit(*syncOptions.aExcludePaths, ","),
*syncOptions.aProxyByFiler, *syncOptions.aProxyByFiler,
grpcDialOptionB,
filerB, filerB,
*syncOptions.bPath, *syncOptions.bPath,
*syncOptions.bReplication, *syncOptions.bReplication,
@@ -228,9 +249,9 @@ func runFilerSynchronize(cmd *Command, args []string) bool {
if !*syncOptions.isActivePassive { if !*syncOptions.isActivePassive {
// b->a // b->a
// set synchronization start timestamp to offset // set synchronization start timestamp to offset
initOffsetError := initOffsetFromTsMs(grpcDialOption, filerA, bFilerSignature, *syncOptions.aFromTsMs, getSignaturePrefixByPath(*syncOptions.bPath)) initOffsetError := initOffsetFromTsMs(grpcDialOptionA, filerA, bFilerSignature, *syncOptions.bFromTsMs, getSignaturePrefixByPath(*syncOptions.bPath))
if initOffsetError != nil { if initOffsetError != nil {
glog.Errorf("init offset from timestamp %d error from %s to %s: %v", *syncOptions.aFromTsMs, *syncOptions.filerB, *syncOptions.filerA, initOffsetError) glog.Errorf("init offset from timestamp %d error from %s to %s: %v", *syncOptions.bFromTsMs, *syncOptions.filerB, *syncOptions.filerA, initOffsetError)
os.Exit(2) os.Exit(2)
} }
go func() { go func() {
@@ -239,11 +260,12 @@ func runFilerSynchronize(cmd *Command, args []string) bool {
err := doSubscribeFilerMetaChanges( err := doSubscribeFilerMetaChanges(
syncOptions.clientId, syncOptions.clientId,
syncOptions.clientEpoch.Load(), syncOptions.clientEpoch.Load(),
grpcDialOption, grpcDialOptionB,
filerB, filerB,
*syncOptions.bPath, *syncOptions.bPath,
util.StringSplit(*syncOptions.bExcludePaths, ","), util.StringSplit(*syncOptions.bExcludePaths, ","),
*syncOptions.bProxyByFiler, *syncOptions.bProxyByFiler,
grpcDialOptionA,
filerA, filerA,
*syncOptions.aPath, *syncOptions.aPath,
*syncOptions.aReplication, *syncOptions.aReplication,
@@ -285,12 +307,12 @@ func initOffsetFromTsMs(grpcDialOption grpc.DialOption, targetFiler pb.ServerAdd
return nil return nil
} }
func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, grpcDialOption grpc.DialOption, sourceFiler pb.ServerAddress, sourcePath string, sourceExcludePaths []string, sourceReadChunkFromFiler bool, targetFiler pb.ServerAddress, targetPath string, func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, sourceGrpcDialOption grpc.DialOption, sourceFiler pb.ServerAddress, sourcePath string, sourceExcludePaths []string, sourceReadChunkFromFiler bool, targetGrpcDialOption grpc.DialOption, targetFiler pb.ServerAddress, targetPath string,
replicationStr, collection string, ttlSec int, sinkWriteChunkByFiler bool, diskType string, debug bool, concurrency int, chunkConcurrency int, doDeleteFiles bool, sourceFilerSignature int32, targetFilerSignature int32, statePtr *atomic.Pointer[syncState]) error { replicationStr, collection string, ttlSec int, sinkWriteChunkByFiler bool, diskType string, debug bool, concurrency int, chunkConcurrency int, doDeleteFiles bool, sourceFilerSignature int32, targetFilerSignature int32, statePtr *atomic.Pointer[syncState]) error {
// if first time, start from now // if first time, start from now
// if has previously synced, resume from that point of time // if has previously synced, resume from that point of time
sourceFilerOffsetTsNs, err := getOffset(grpcDialOption, targetFiler, getSignaturePrefixByPath(sourcePath), sourceFilerSignature) sourceFilerOffsetTsNs, err := getOffset(targetGrpcDialOption, targetFiler, getSignaturePrefixByPath(sourcePath), sourceFilerSignature)
if err != nil { if err != nil {
return err return err
} }
@@ -300,8 +322,9 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, grpcDialOpti
// create filer sink // create filer sink
filerSource := &source.FilerSource{} filerSource := &source.FilerSource{}
filerSource.DoInitialize(sourceFiler.ToHttpAddress(), sourceFiler.ToGrpcAddress(), sourcePath, sourceReadChunkFromFiler) filerSource.DoInitialize(sourceFiler.ToHttpAddress(), sourceFiler.ToGrpcAddress(), sourcePath, sourceReadChunkFromFiler)
filerSource.SetGrpcDialOption(sourceGrpcDialOption)
filerSink := &filersink.FilerSink{} filerSink := &filersink.FilerSink{}
filerSink.DoInitialize(targetFiler.ToHttpAddress(), targetFiler.ToGrpcAddress(), targetPath, replicationStr, collection, ttlSec, diskType, grpcDialOption, sinkWriteChunkByFiler) filerSink.DoInitialize(targetFiler.ToHttpAddress(), targetFiler.ToGrpcAddress(), targetPath, replicationStr, collection, ttlSec, diskType, targetGrpcDialOption, sinkWriteChunkByFiler)
filerSink.SetChunkConcurrency(chunkConcurrency) filerSink.SetChunkConcurrency(chunkConcurrency)
filerSink.SetSourceFiler(filerSource) filerSink.SetSourceFiler(filerSource)
@@ -328,7 +351,7 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, grpcDialOpti
if statePtr != nil { if statePtr != nil {
statePtr.Store(&syncState{ statePtr.Store(&syncState{
processor: processor, processor: processor,
grpcDialOption: grpcDialOption, grpcDialOption: targetGrpcDialOption,
targetFiler: targetFiler, targetFiler: targetFiler,
sourcePath: sourcePath, sourcePath: sourcePath,
sourceFilerSignature: sourceFilerSignature, sourceFilerSignature: sourceFilerSignature,
@@ -351,7 +374,7 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, grpcDialOpti
lastLogTsNs = now lastLogTsNs = now
// collect synchronous offset // collect synchronous offset
statsCollect.FilerSyncOffsetGauge.WithLabelValues(sourceFiler.String(), targetFiler.String(), clientName, sourcePath).Set(float64(offsetTsNs)) statsCollect.FilerSyncOffsetGauge.WithLabelValues(sourceFiler.String(), targetFiler.String(), clientName, sourcePath).Set(float64(offsetTsNs))
return setOffset(grpcDialOption, targetFiler, getSignaturePrefixByPath(sourcePath), sourceFilerSignature, offsetTsNs) return setOffset(targetGrpcDialOption, targetFiler, getSignaturePrefixByPath(sourcePath), sourceFilerSignature, offsetTsNs)
}) })
prefix := sourcePath prefix := sourcePath
@@ -372,7 +395,7 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, grpcDialOpti
EventErrorType: pb.RetryForeverOnError, EventErrorType: pb.RetryForeverOnError,
} }
return pb.FollowMetadata(sourceFiler, grpcDialOption, metadataFollowOption, processEventFnWithOffset) return pb.FollowMetadata(sourceFiler, sourceGrpcDialOption, metadataFollowOption, processEventFnWithOffset)
} }

View File

@@ -50,6 +50,10 @@ func (fs *FilerSource) DoInitialize(address, grpcAddress string, dir string, rea
return nil return nil
} }
func (fs *FilerSource) SetGrpcDialOption(option grpc.DialOption) {
fs.grpcDialOption = option
}
func (fs *FilerSource) LookupFileId(ctx context.Context, part string) (fileUrls []string, err error) { func (fs *FilerSource) LookupFileId(ctx context.Context, part string) (fileUrls []string, err error) {
vid2Locations := make(map[string]*filer_pb.Locations) vid2Locations := make(map[string]*filer_pb.Locations)

View File

@@ -7,10 +7,13 @@ import (
"fmt" "fmt"
"net" "net"
"os" "os"
"path/filepath"
"slices" "slices"
"strings" "strings"
"time" "time"
"github.com/spf13/viper"
"github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/util"
"google.golang.org/grpc" "google.golang.org/grpc"
@@ -139,6 +142,23 @@ func LoadServerTLS(config *util.ViperProxy, component string) (grpc.ServerOption
return grpc.Creds(ta), nil return grpc.Creds(ta), nil
} }
func LoadClientTLSFromFile(configFile string, component string) (grpc.DialOption, error) {
v := viper.New()
v.SetConfigFile(configFile)
if err := v.ReadInConfig(); err != nil {
return nil, fmt.Errorf("failed to read security config %s: %v", configFile, err)
}
// Resolve relative PEM paths against the config file's directory.
configDir := filepath.Dir(configFile)
for _, key := range []string{"grpc.ca", component + ".cert", component + ".key"} {
p := v.GetString(key)
if p != "" && !filepath.IsAbs(p) {
v.Set(key, filepath.Join(configDir, p))
}
}
return LoadClientTLS(&util.ViperProxy{Viper: v}, component), nil
}
func LoadClientTLS(config *util.ViperProxy, component string) grpc.DialOption { func LoadClientTLS(config *util.ViperProxy, component string) grpc.DialOption {
if config == nil { if config == nil {
return grpc.WithTransportCredentials(insecure.NewCredentials()) return grpc.WithTransportCredentials(insecure.NewCredentials())