Files
seaweedFS/weed/shell/common.go
Chris Lu 0a46577700 Fix #8040: Support '_default' keyword in collectionPattern to match default collection (#8046)
* Fix #8040: Support 'default' keyword in collectionPattern to match default collection

The default collection in SeaweedFS is represented as an empty string internally.
Previously, it was impossible to specifically target only the default collection
because:
- Empty collectionPattern matched ALL collections (filter was skipped)
- Using collectionPattern="default" tried to match the literal string "default"

This commit adds special handling for the keyword "default" in collectionPattern
across multiple shell commands:
- volume.tier.move
- volume.list
- volume.fix.replication
- volume.configure.replication

Now users can use -collectionPattern="default" to specifically target volumes
in the default collection (empty collection name), while maintaining backward
compatibility where empty pattern matches all collections.

Updated help text to document this feature.

* Update compileCollectionPattern to support 'default' keyword

This extends the fix to all commands that use regex-based collection
pattern matching:
- ec.encode
- ec.decode
- volume.tier.download
- volume.balance

The compileCollectionPattern function now treats "default" as a special
keyword that compiles to the regex "^$" (matching empty strings), making
it consistent with the other commands that use filepath.Match.

* Use CollectionDefault constant instead of hardcoded "default" string

Refactored the collection pattern matching logic to use a central constant
CollectionDefault defined in weed/shell/common.go. This improves maintainability
and ensures consistency across all shell commands.

* Address PR review feedback: simplify logic and use '_default' keyword

Changes:
1. Changed CollectionDefault from "default" to "_default" to avoid collision
   with literal collection names
2. Simplified pattern matching logic to reduce code duplication across all
   affected commands
3. Fixed error handling in command_volume_tier_move.go to properly propagate
   filepath.Match errors instead of swallowing them
4. Updated documentation to clarify how to match a literal "default"
   collection using regex patterns like "^default$"

This addresses all feedback from PR review comments.

* Remove unnecessary documentation about matching literal 'default'

Since we changed the keyword to '_default', users can now simply use
'default' to match a literal collection named "default". The previous
documentation about using regex patterns was confusing and no longer needed.

* Fix error propagation and empty pattern handling

1. command_volume_tier_move.go: Added early termination check after
   eachDataNode callback to stop processing remaining nodes if a pattern
   matching error occurred, improving efficiency

2. command_volume_configure_replication.go: Fixed empty pattern handling
   to match all collections (collectionMatched = true when pattern is empty),
   mirroring the behavior in other commands

These changes address the remaining PR review feedback.
2026-01-16 12:31:48 -08:00

83 lines
2.1 KiB
Go

package shell
import (
"errors"
"fmt"
"sync"
)
var (
// Default maximum parallelization/concurrency for commands supporting it.
DefaultMaxParallelization = 10
// CollectionDefault is the special keyword to match empty collection names.
// Use "_default" to avoid collision with a literal collection named "default".
CollectionDefault = "_default"
)
// ErrorWaitGroup implements a goroutine wait group which aggregates errors, if any.
type ErrorWaitGroup struct {
maxConcurrency int
wg *sync.WaitGroup
wgSem chan bool
errors []error
errorsMu sync.Mutex
}
type ErrorWaitGroupTask func() error
func NewErrorWaitGroup(maxConcurrency int) *ErrorWaitGroup {
if maxConcurrency <= 0 {
// no concurrency = one task at the time
maxConcurrency = 1
}
return &ErrorWaitGroup{
maxConcurrency: maxConcurrency,
wg: &sync.WaitGroup{},
wgSem: make(chan bool, maxConcurrency),
}
}
// Reset restarts an ErrorWaitGroup, keeping original settings. Errors and pending goroutines, if any, are flushed.
func (ewg *ErrorWaitGroup) Reset() {
close(ewg.wgSem)
ewg.wg = &sync.WaitGroup{}
ewg.wgSem = make(chan bool, ewg.maxConcurrency)
ewg.errors = nil
}
// Add queues an ErrorWaitGroupTask to be executed as a goroutine.
func (ewg *ErrorWaitGroup) Add(f ErrorWaitGroupTask) {
if ewg.maxConcurrency <= 1 {
// keep run order deterministic when parallelization is off
ewg.errors = append(ewg.errors, f())
return
}
ewg.wg.Add(1)
go func() {
ewg.wgSem <- true
err := f()
ewg.errorsMu.Lock()
ewg.errors = append(ewg.errors, err)
ewg.errorsMu.Unlock()
<-ewg.wgSem
ewg.wg.Done()
}()
}
// AddErrorf adds an error to an ErrorWaitGroupTask result, without queueing any goroutines.
func (ewg *ErrorWaitGroup) AddErrorf(format string, a ...interface{}) {
ewg.errorsMu.Lock()
ewg.errors = append(ewg.errors, fmt.Errorf(format, a...))
ewg.errorsMu.Unlock()
}
// Wait sleeps until all ErrorWaitGroupTasks are completed, then returns errors for them.
func (ewg *ErrorWaitGroup) Wait() error {
ewg.wg.Wait()
return errors.Join(ewg.errors...)
}