Add -filerExcludePathPattern flag and fix nil panic in -filerExcludeFileName (#8756)

* Fix filerExcludeFileName to support directory names and path components

The original implementation only matched excludeFileName against
message.NewEntry.Name, which caused two issues:

1. Nil pointer panic on delete events (NewEntry is nil)
2. Files inside excluded directories were still backed up because
   the parent directory name was not checked

This patch:
- Checks all path components in resp.Directory against the regexp
- Adds nil guard for message.NewEntry before accessing .Name
- Also checks message.OldEntry.Name for rename/delete events

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Add -filerExcludePathPattern flag and fix nil panic in filerExcludeFileName

Separate concerns between two exclude mechanisms:
- filerExcludeFileName: matches entry name only (leaf node)
- filerExcludePathPattern (NEW): matches any path component via regexp,
  so files inside matched directories are also excluded

Also fixes nil pointer panic when filerExcludeFileName encounters
delete events where NewEntry is nil.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Refactor exclude logic: per-side exclusion for rename events, reduce duplication

- Extract isEntryExcluded() to compute exclusion per old/new side,
  so rename events crossing an exclude boundary are handled as
  delete + create instead of being entirely skipped
- Extract compileExcludePattern() to deduplicate regexp compilation
- Replace strings.Split with allocation-free pathContainsMatch()
- Check message.NewParentPath (not just resp.Directory) for new side

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Move regexp compilation out of retry loop to fail fast on config errors

compileExcludePattern for -filerExcludeFileName and -filerExcludePathPattern
are configuration-time validations that will never succeed on retry.
Move them to runFilerBackup before the reconnect loop and use glog.Fatalf
on failure, so invalid patterns are caught immediately at startup instead
of being retried every 1.7 seconds indefinitely.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Add wildcard matching helpers for path and filename exclusion

* Replace regexp exclude patterns with wildcard-based flags, deprecate -filerExcludeFileName

Add -filerExcludeFileNames and -filerExcludePathPatterns flags that accept
comma-separated wildcard patterns (*, ?) using the existing wildcard library.
Mark -filerExcludeFileName as deprecated but keep its regexp behavior.

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Chris Lu <chris.lu@gmail.com>
This commit is contained in:
Jaehoon Kim
2026-03-27 02:04:06 +09:00
committed by GitHub
parent ccc662b90b
commit 6cf34f2376
2 changed files with 122 additions and 27 deletions

View File

@@ -15,22 +15,25 @@ import (
"github.com/seaweedfs/seaweedfs/weed/security" "github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/http" "github.com/seaweedfs/seaweedfs/weed/util/http"
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
"google.golang.org/grpc" "google.golang.org/grpc"
) )
type FilerBackupOptions struct { type FilerBackupOptions struct {
isActivePassive *bool isActivePassive *bool
filer *string filer *string
path *string path *string
excludePaths *string excludePaths *string
excludeFileName *string excludeFileName *string // deprecated: use excludeFileNames
debug *bool excludeFileNames *string
proxyByFiler *bool excludePathPatterns *string
doDeleteFiles *bool debug *bool
disableErrorRetry *bool proxyByFiler *bool
ignore404Error *bool doDeleteFiles *bool
timeAgo *time.Duration disableErrorRetry *bool
retentionDays *int ignore404Error *bool
timeAgo *time.Duration
retentionDays *int
} }
var ( var (
@@ -43,7 +46,9 @@ func init() {
filerBackupOptions.filer = cmdFilerBackup.Flag.String("filer", "localhost:8888", "filer of one SeaweedFS cluster") filerBackupOptions.filer = cmdFilerBackup.Flag.String("filer", "localhost:8888", "filer of one SeaweedFS cluster")
filerBackupOptions.path = cmdFilerBackup.Flag.String("filerPath", "/", "directory to sync on filer") filerBackupOptions.path = cmdFilerBackup.Flag.String("filerPath", "/", "directory to sync on filer")
filerBackupOptions.excludePaths = cmdFilerBackup.Flag.String("filerExcludePaths", "", "exclude directories to sync on filer") filerBackupOptions.excludePaths = cmdFilerBackup.Flag.String("filerExcludePaths", "", "exclude directories to sync on filer")
filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "exclude file names that match the regexp to sync on filer") filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "[DEPRECATED: use -filerExcludeFileNames] exclude file names that match the regexp")
filerBackupOptions.excludeFileNames = cmdFilerBackup.Flag.String("filerExcludeFileNames", "", "comma-separated wildcard patterns to exclude file names, e.g., \"*.tmp,._*\"")
filerBackupOptions.excludePathPatterns = cmdFilerBackup.Flag.String("filerExcludePathPatterns", "", "comma-separated wildcard patterns to exclude paths where any component matches, e.g., \".snapshot,temp*\"")
filerBackupOptions.proxyByFiler = cmdFilerBackup.Flag.Bool("filerProxy", false, "read and write file chunks by filer instead of volume servers") filerBackupOptions.proxyByFiler = cmdFilerBackup.Flag.Bool("filerProxy", false, "read and write file chunks by filer instead of volume servers")
filerBackupOptions.doDeleteFiles = cmdFilerBackup.Flag.Bool("doDeleteFiles", false, "delete files on the destination") filerBackupOptions.doDeleteFiles = cmdFilerBackup.Flag.Bool("doDeleteFiles", false, "delete files on the destination")
filerBackupOptions.debug = cmdFilerBackup.Flag.Bool("debug", false, "debug mode to print out received files") filerBackupOptions.debug = cmdFilerBackup.Flag.Bool("debug", false, "debug mode to print out received files")
@@ -72,6 +77,15 @@ func runFilerBackup(cmd *Command, args []string) bool {
util.LoadSecurityConfiguration() util.LoadSecurityConfiguration()
util.LoadConfiguration("replication", true) util.LoadConfiguration("replication", true)
// Compile exclude patterns once before the retry loop — these are
// configuration errors and must not be retried.
reExcludeFileName, err := compileExcludePattern(*filerBackupOptions.excludeFileName, "exclude file name")
if err != nil {
glog.Fatalf("invalid -filerExcludeFileName: %v", err)
}
excludeFileNames := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludeFileNames)
excludePathPatterns := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludePathPatterns)
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
clientId := util.RandomInt32() clientId := util.RandomInt32()
@@ -79,7 +93,7 @@ func runFilerBackup(cmd *Command, args []string) bool {
for { for {
clientEpoch++ clientEpoch++
err := doFilerBackup(grpcDialOption, &filerBackupOptions, clientId, clientEpoch) err := doFilerBackup(grpcDialOption, &filerBackupOptions, reExcludeFileName, excludeFileNames, excludePathPatterns, clientId, clientEpoch)
if err != nil { if err != nil {
glog.Errorf("backup from %s: %v", *filerBackupOptions.filer, err) glog.Errorf("backup from %s: %v", *filerBackupOptions.filer, err)
time.Sleep(1747 * time.Millisecond) time.Sleep(1747 * time.Millisecond)
@@ -91,7 +105,7 @@ const (
BackupKeyPrefix = "backup." BackupKeyPrefix = "backup."
) )
func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, clientId int32, clientEpoch int32) error { func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher, clientId int32, clientEpoch int32) error {
// find data sink // find data sink
dataSink := findSink(util.GetViper()) dataSink := findSink(util.GetViper())
@@ -102,13 +116,6 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti
sourceFiler := pb.ServerAddress(*backupOption.filer) sourceFiler := pb.ServerAddress(*backupOption.filer)
sourcePath := *backupOption.path sourcePath := *backupOption.path
excludePaths := util.StringSplit(*backupOption.excludePaths, ",") excludePaths := util.StringSplit(*backupOption.excludePaths, ",")
var reExcludeFileName *regexp.Regexp
if *backupOption.excludeFileName != "" {
var err error
if reExcludeFileName, err = regexp.Compile(*backupOption.excludeFileName); err != nil {
return fmt.Errorf("error compile regexp %v for exclude file name: %+v", *backupOption.excludeFileName, err)
}
}
timeAgo := *backupOption.timeAgo timeAgo := *backupOption.timeAgo
targetPath := dataSink.GetSinkToDirectory() targetPath := dataSink.GetSinkToDirectory()
debug := *backupOption.debug debug := *backupOption.debug
@@ -140,7 +147,7 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti
var processEventFn func(*filer_pb.SubscribeMetadataResponse) error var processEventFn func(*filer_pb.SubscribeMetadataResponse) error
if *backupOption.ignore404Error { if *backupOption.ignore404Error {
processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug) processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug)
processEventFn = func(resp *filer_pb.SubscribeMetadataResponse) error { processEventFn = func(resp *filer_pb.SubscribeMetadataResponse) error {
err := processEventFnGenerated(resp) err := processEventFnGenerated(resp)
if err == nil { if err == nil {
@@ -153,7 +160,7 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti
return err return err
} }
} else { } else {
processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug) processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug)
} }
processEventFnWithOffset := pb.AddOffsetFunc(processEventFn, 3*time.Second, func(counter int64, lastTsNs int64) error { processEventFnWithOffset := pb.AddOffsetFunc(processEventFn, 3*time.Second, func(counter int64, lastTsNs int64) error {

View File

@@ -21,6 +21,7 @@ import (
statsCollect "github.com/seaweedfs/seaweedfs/weed/stats" statsCollect "github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/grace" "github.com/seaweedfs/seaweedfs/weed/util/grace"
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
"google.golang.org/grpc" "google.golang.org/grpc"
) )
@@ -304,7 +305,7 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, grpcDialOpti
filerSink.SetChunkConcurrency(chunkConcurrency) filerSink.SetChunkConcurrency(chunkConcurrency)
filerSink.SetSourceFiler(filerSource) filerSink.SetSourceFiler(filerSource)
persistEventFn := genProcessFunction(sourcePath, targetPath, sourceExcludePaths, nil, filerSink, doDeleteFiles, debug) persistEventFn := genProcessFunction(sourcePath, targetPath, sourceExcludePaths, nil, nil, nil, filerSink, doDeleteFiles, debug)
processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error { processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error {
message := resp.EventNotification message := resp.EventNotification
@@ -439,7 +440,7 @@ func setOffset(grpcDialOption grpc.DialOption, filer pb.ServerAddress, signature
} }
func genProcessFunction(sourcePath string, targetPath string, excludePaths []string, reExcludeFileName *regexp.Regexp, dataSink sink.ReplicationSink, doDeleteFiles bool, debug bool) func(resp *filer_pb.SubscribeMetadataResponse) error { func genProcessFunction(sourcePath string, targetPath string, excludePaths []string, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher, dataSink sink.ReplicationSink, doDeleteFiles bool, debug bool) func(resp *filer_pb.SubscribeMetadataResponse) error {
// process function // process function
processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error { processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error {
message := resp.EventNotification message := resp.EventNotification
@@ -468,9 +469,24 @@ func genProcessFunction(sourcePath string, targetPath string, excludePaths []str
return nil return nil
} }
} }
if reExcludeFileName != nil && reExcludeFileName.MatchString(message.NewEntry.Name) { // Compute per-side exclusion so that rename events crossing an
// exclude boundary are handled as delete + create rather than
// being entirely skipped.
oldExcluded := isEntryExcluded(resp.Directory, message.OldEntry, reExcludeFileName, excludeFileNames, excludePathPatterns)
newExcluded := isEntryExcluded(message.NewParentPath, message.NewEntry, reExcludeFileName, excludeFileNames, excludePathPatterns)
if oldExcluded && newExcluded {
return nil return nil
} }
if oldExcluded {
// Old side is excluded — treat as pure create of new entry.
message.OldEntry = nil
}
if newExcluded {
// New side is excluded — treat as pure delete of old entry.
message.NewEntry = nil
sourceNewKey = ""
}
if dataSink.IsIncremental() { if dataSink.IsIncremental() {
doDeleteFiles = false doDeleteFiles = false
} }
@@ -578,3 +594,75 @@ func buildKey(dataSink sink.ReplicationSink, message *filer_pb.EventNotification
return escapeKey(key) return escapeKey(key)
} }
// isEntryExcluded checks whether a single side (old or new) of an event is excluded
// by the deprecated filename regexp, the wildcard file-name matchers, or the
// wildcard path-pattern matchers.
func isEntryExcluded(dir string, entry *filer_pb.Entry, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher) bool {
if entry == nil {
return false
}
// deprecated regexp-based filename exclusion
if reExcludeFileName != nil && reExcludeFileName.MatchString(entry.Name) {
return true
}
// wildcard-based filename exclusion
if len(excludeFileNames) > 0 && matchesAnyWildcard(excludeFileNames, entry.Name) {
return true
}
// wildcard-based path-pattern exclusion: match against each directory
// component and the entry name itself
if len(excludePathPatterns) > 0 {
if pathContainsWildcardMatch(dir, excludePathPatterns) {
return true
}
if matchesAnyWildcard(excludePathPatterns, entry.Name) {
return true
}
}
return false
}
// compileExcludePattern compiles a regexp pattern string, returning nil if empty.
func compileExcludePattern(pattern string, label string) (*regexp.Regexp, error) {
if pattern == "" {
return nil, nil
}
re, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("error compile regexp %v for %s: %+v", pattern, label, err)
}
return re, nil
}
// matchesAnyWildcard returns true if any matcher matches the value.
// Returns false when matchers is empty (unlike wildcard.MatchesAnyWildcard
// which returns true for empty matchers).
func matchesAnyWildcard(matchers []*wildcard.WildcardMatcher, value string) bool {
for _, m := range matchers {
if m != nil && m.Match(value) {
return true
}
}
return false
}
// pathContainsWildcardMatch checks if any component of the given path matches
// any of the wildcard matchers, without allocating a slice.
func pathContainsWildcardMatch(path string, matchers []*wildcard.WildcardMatcher) bool {
for path != "" {
i := strings.IndexByte(path, '/')
var component string
if i < 0 {
component = path
path = ""
} else {
component = path[:i]
path = path[i+1:]
}
if component != "" && matchesAnyWildcard(matchers, component) {
return true
}
}
return false
}