Add -filerExcludePathPattern flag and fix nil panic in -filerExcludeFileName (#8756)
* Fix filerExcludeFileName to support directory names and path components The original implementation only matched excludeFileName against message.NewEntry.Name, which caused two issues: 1. Nil pointer panic on delete events (NewEntry is nil) 2. Files inside excluded directories were still backed up because the parent directory name was not checked This patch: - Checks all path components in resp.Directory against the regexp - Adds nil guard for message.NewEntry before accessing .Name - Also checks message.OldEntry.Name for rename/delete events Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Add -filerExcludePathPattern flag and fix nil panic in filerExcludeFileName Separate concerns between two exclude mechanisms: - filerExcludeFileName: matches entry name only (leaf node) - filerExcludePathPattern (NEW): matches any path component via regexp, so files inside matched directories are also excluded Also fixes nil pointer panic when filerExcludeFileName encounters delete events where NewEntry is nil. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Refactor exclude logic: per-side exclusion for rename events, reduce duplication - Extract isEntryExcluded() to compute exclusion per old/new side, so rename events crossing an exclude boundary are handled as delete + create instead of being entirely skipped - Extract compileExcludePattern() to deduplicate regexp compilation - Replace strings.Split with allocation-free pathContainsMatch() - Check message.NewParentPath (not just resp.Directory) for new side Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Move regexp compilation out of retry loop to fail fast on config errors compileExcludePattern for -filerExcludeFileName and -filerExcludePathPattern are configuration-time validations that will never succeed on retry. Move them to runFilerBackup before the reconnect loop and use glog.Fatalf on failure, so invalid patterns are caught immediately at startup instead of being retried every 1.7 seconds indefinitely. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Add wildcard matching helpers for path and filename exclusion * Replace regexp exclude patterns with wildcard-based flags, deprecate -filerExcludeFileName Add -filerExcludeFileNames and -filerExcludePathPatterns flags that accept comma-separated wildcard patterns (*, ?) using the existing wildcard library. Mark -filerExcludeFileName as deprecated but keep its regexp behavior. --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Chris Lu <chris.lu@gmail.com>
This commit is contained in:
@@ -15,22 +15,25 @@ import (
|
||||
"github.com/seaweedfs/seaweedfs/weed/security"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util/http"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
type FilerBackupOptions struct {
|
||||
isActivePassive *bool
|
||||
filer *string
|
||||
path *string
|
||||
excludePaths *string
|
||||
excludeFileName *string
|
||||
debug *bool
|
||||
proxyByFiler *bool
|
||||
doDeleteFiles *bool
|
||||
disableErrorRetry *bool
|
||||
ignore404Error *bool
|
||||
timeAgo *time.Duration
|
||||
retentionDays *int
|
||||
isActivePassive *bool
|
||||
filer *string
|
||||
path *string
|
||||
excludePaths *string
|
||||
excludeFileName *string // deprecated: use excludeFileNames
|
||||
excludeFileNames *string
|
||||
excludePathPatterns *string
|
||||
debug *bool
|
||||
proxyByFiler *bool
|
||||
doDeleteFiles *bool
|
||||
disableErrorRetry *bool
|
||||
ignore404Error *bool
|
||||
timeAgo *time.Duration
|
||||
retentionDays *int
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -43,7 +46,9 @@ func init() {
|
||||
filerBackupOptions.filer = cmdFilerBackup.Flag.String("filer", "localhost:8888", "filer of one SeaweedFS cluster")
|
||||
filerBackupOptions.path = cmdFilerBackup.Flag.String("filerPath", "/", "directory to sync on filer")
|
||||
filerBackupOptions.excludePaths = cmdFilerBackup.Flag.String("filerExcludePaths", "", "exclude directories to sync on filer")
|
||||
filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "exclude file names that match the regexp to sync on filer")
|
||||
filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "[DEPRECATED: use -filerExcludeFileNames] exclude file names that match the regexp")
|
||||
filerBackupOptions.excludeFileNames = cmdFilerBackup.Flag.String("filerExcludeFileNames", "", "comma-separated wildcard patterns to exclude file names, e.g., \"*.tmp,._*\"")
|
||||
filerBackupOptions.excludePathPatterns = cmdFilerBackup.Flag.String("filerExcludePathPatterns", "", "comma-separated wildcard patterns to exclude paths where any component matches, e.g., \".snapshot,temp*\"")
|
||||
filerBackupOptions.proxyByFiler = cmdFilerBackup.Flag.Bool("filerProxy", false, "read and write file chunks by filer instead of volume servers")
|
||||
filerBackupOptions.doDeleteFiles = cmdFilerBackup.Flag.Bool("doDeleteFiles", false, "delete files on the destination")
|
||||
filerBackupOptions.debug = cmdFilerBackup.Flag.Bool("debug", false, "debug mode to print out received files")
|
||||
@@ -72,6 +77,15 @@ func runFilerBackup(cmd *Command, args []string) bool {
|
||||
util.LoadSecurityConfiguration()
|
||||
util.LoadConfiguration("replication", true)
|
||||
|
||||
// Compile exclude patterns once before the retry loop — these are
|
||||
// configuration errors and must not be retried.
|
||||
reExcludeFileName, err := compileExcludePattern(*filerBackupOptions.excludeFileName, "exclude file name")
|
||||
if err != nil {
|
||||
glog.Fatalf("invalid -filerExcludeFileName: %v", err)
|
||||
}
|
||||
excludeFileNames := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludeFileNames)
|
||||
excludePathPatterns := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludePathPatterns)
|
||||
|
||||
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
|
||||
|
||||
clientId := util.RandomInt32()
|
||||
@@ -79,7 +93,7 @@ func runFilerBackup(cmd *Command, args []string) bool {
|
||||
|
||||
for {
|
||||
clientEpoch++
|
||||
err := doFilerBackup(grpcDialOption, &filerBackupOptions, clientId, clientEpoch)
|
||||
err := doFilerBackup(grpcDialOption, &filerBackupOptions, reExcludeFileName, excludeFileNames, excludePathPatterns, clientId, clientEpoch)
|
||||
if err != nil {
|
||||
glog.Errorf("backup from %s: %v", *filerBackupOptions.filer, err)
|
||||
time.Sleep(1747 * time.Millisecond)
|
||||
@@ -91,7 +105,7 @@ const (
|
||||
BackupKeyPrefix = "backup."
|
||||
)
|
||||
|
||||
func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, clientId int32, clientEpoch int32) error {
|
||||
func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher, clientId int32, clientEpoch int32) error {
|
||||
|
||||
// find data sink
|
||||
dataSink := findSink(util.GetViper())
|
||||
@@ -102,13 +116,6 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti
|
||||
sourceFiler := pb.ServerAddress(*backupOption.filer)
|
||||
sourcePath := *backupOption.path
|
||||
excludePaths := util.StringSplit(*backupOption.excludePaths, ",")
|
||||
var reExcludeFileName *regexp.Regexp
|
||||
if *backupOption.excludeFileName != "" {
|
||||
var err error
|
||||
if reExcludeFileName, err = regexp.Compile(*backupOption.excludeFileName); err != nil {
|
||||
return fmt.Errorf("error compile regexp %v for exclude file name: %+v", *backupOption.excludeFileName, err)
|
||||
}
|
||||
}
|
||||
timeAgo := *backupOption.timeAgo
|
||||
targetPath := dataSink.GetSinkToDirectory()
|
||||
debug := *backupOption.debug
|
||||
@@ -140,7 +147,7 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti
|
||||
|
||||
var processEventFn func(*filer_pb.SubscribeMetadataResponse) error
|
||||
if *backupOption.ignore404Error {
|
||||
processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug)
|
||||
processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug)
|
||||
processEventFn = func(resp *filer_pb.SubscribeMetadataResponse) error {
|
||||
err := processEventFnGenerated(resp)
|
||||
if err == nil {
|
||||
@@ -153,7 +160,7 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug)
|
||||
processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug)
|
||||
}
|
||||
|
||||
processEventFnWithOffset := pb.AddOffsetFunc(processEventFn, 3*time.Second, func(counter int64, lastTsNs int64) error {
|
||||
|
||||
Reference in New Issue
Block a user