Files
seaweedFS/weed/command/filer_backup.go
Chris Lu 1261e93ef2 fix: comprehensive go vet error fixes and add CI enforcement (#7861)
* fix: use keyed fields in struct literals

- Replace unsafe reflect.StringHeader/SliceHeader with safe unsafe.String/Slice (weed/query/sqltypes/unsafe.go)
- Add field names to Type_ScalarType struct literals (weed/mq/schema/schema_builder.go)
- Add Duration field name to FlexibleDuration struct literals across test files
- Add field names to bson.D struct literals (weed/filer/mongodb/mongodb_store_kv.go)

Fixes go vet warnings about unkeyed struct literals.

* fix: remove unreachable code

- Remove unreachable return statements after infinite for loops
- Remove unreachable code after if/else blocks where all paths return
- Simplify recursive logic by removing unnecessary for loop (inode_to_path.go)
- Fix Type_ScalarType literal to use enum value directly (schema_builder.go)
- Call onCompletionFn on stream error (subscribe_session.go)

Files fixed:
- weed/query/sqltypes/unsafe.go
- weed/mq/schema/schema_builder.go
- weed/mq/client/sub_client/connect_to_sub_coordinator.go
- weed/filer/redis3/ItemList.go
- weed/mq/client/agent_client/subscribe_session.go
- weed/mq/broker/broker_grpc_pub_balancer.go
- weed/mount/inode_to_path.go
- weed/util/skiplist/name_list.go

* fix: avoid copying lock values in protobuf messages

- Use proto.Merge() instead of direct assignment to avoid copying sync.Mutex in S3ApiConfiguration (iamapi_server.go)
- Add explicit comments noting that channel-received values are already copies before taking addresses (volume_grpc_client_to_master.go)

The protobuf messages contain sync.Mutex fields from the message state, which should not be copied.
Using proto.Merge() properly merges messages without copying the embedded mutex.

* fix: correct byte array size for uint32 bit shift operations

The generateAccountId() function only needs 4 bytes to create a uint32 value.
Changed from allocating 8 bytes to 4 bytes to match the actual usage.

This fixes go vet warning about shifting 8-bit values (bytes) by more than 8 bits.

* fix: ensure context cancellation on all error paths

In broker_client_subscribe.go, ensure subscriberCancel() is called on all error return paths:
- When stream creation fails
- When partition assignment fails
- When sending initialization message fails

This prevents context leaks when an error occurs during subscriber creation.

* fix: ensure subscriberCancel called for CreateFreshSubscriber stream.Send error

Ensure subscriberCancel() is called when stream.Send fails in CreateFreshSubscriber.

* ci: add go vet step to prevent future lint regressions

- Add go vet step to GitHub Actions workflow
- Filter known protobuf lock warnings (MessageState sync.Mutex)
  These are expected in generated protobuf code and are safe
- Prevents accumulation of go vet errors in future PRs
- Step runs before build to catch issues early

* fix: resolve remaining syntax and logic errors in vet fixes

- Fixed syntax errors in filer_sync.go caused by missing closing braces
- Added missing closing brace for if block and function
- Synchronized fixes to match previous commits on branch

* fix: add missing return statements to daemon functions

- Add 'return false' after infinite loops in filer_backup.go and filer_meta_backup.go
- Satisfies declared bool return type signatures
- Maintains consistency with other daemon functions (runMaster, runFilerSynchronize, runWorker)
- While unreachable, explicitly declares the return satisfies function signature contract

* fix: add nil check for onCompletionFn in SubscribeMessageRecord

- Check if onCompletionFn is not nil before calling it
- Prevents potential panic if nil function is passed
- Matches pattern used in other callback functions

* docs: clarify unreachable return statements in daemon functions

- Add comments documenting that return statements satisfy function signature
- Explains that these returns follow infinite loops and are unreachable
- Improves code clarity for future maintainers
2025-12-23 14:48:50 -08:00

202 lines
7.5 KiB
Go

package command
import (
"errors"
"fmt"
"regexp"
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/replication/source"
"github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/http"
"google.golang.org/grpc"
)
type FilerBackupOptions struct {
isActivePassive *bool
filer *string
path *string
excludePaths *string
excludeFileName *string
debug *bool
proxyByFiler *bool
doDeleteFiles *bool
disableErrorRetry *bool
ignore404Error *bool
timeAgo *time.Duration
retentionDays *int
}
var (
filerBackupOptions FilerBackupOptions
)
func init() {
cmdFilerBackup.Run = runFilerBackup // break init cycle
filerBackupOptions.filer = cmdFilerBackup.Flag.String("filer", "localhost:8888", "filer of one SeaweedFS cluster")
filerBackupOptions.path = cmdFilerBackup.Flag.String("filerPath", "/", "directory to sync on filer")
filerBackupOptions.excludePaths = cmdFilerBackup.Flag.String("filerExcludePaths", "", "exclude directories to sync on filer")
filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "exclude file names that match the regexp to sync on filer")
filerBackupOptions.proxyByFiler = cmdFilerBackup.Flag.Bool("filerProxy", false, "read and write file chunks by filer instead of volume servers")
filerBackupOptions.doDeleteFiles = cmdFilerBackup.Flag.Bool("doDeleteFiles", false, "delete files on the destination")
filerBackupOptions.debug = cmdFilerBackup.Flag.Bool("debug", false, "debug mode to print out received files")
filerBackupOptions.timeAgo = cmdFilerBackup.Flag.Duration("timeAgo", 0, "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"")
filerBackupOptions.retentionDays = cmdFilerBackup.Flag.Int("retentionDays", 0, "incremental backup retention days")
filerBackupOptions.disableErrorRetry = cmdFilerBackup.Flag.Bool("disableErrorRetry", false, "disables errors retry, only logs will print")
filerBackupOptions.ignore404Error = cmdFilerBackup.Flag.Bool("ignore404Error", true, "ignore 404 errors from filer")
}
var cmdFilerBackup = &Command{
UsageLine: "filer.backup -filer=<filerHost>:<filerPort> ",
Short: "resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml",
Long: `resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml
filer.backup listens on filer notifications. If any file is updated, it will fetch the updated content,
and write to the destination. This is to replace filer.replicate command since additional message queue is not needed.
If restarted and "-timeAgo" is not set, the synchronization will resume from the previous checkpoints, persisted every minute.
A fresh sync will start from the earliest metadata logs. To reset the checkpoints, just set "-timeAgo" to a high value.
`,
}
func runFilerBackup(cmd *Command, args []string) bool {
util.LoadSecurityConfiguration()
util.LoadConfiguration("replication", true)
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
clientId := util.RandomInt32()
var clientEpoch int32
for {
clientEpoch++
err := doFilerBackup(grpcDialOption, &filerBackupOptions, clientId, clientEpoch)
if err != nil {
glog.Errorf("backup from %s: %v", *filerBackupOptions.filer, err)
time.Sleep(1747 * time.Millisecond)
}
}
// Unreachable: satisfies bool return type signature for daemon function
return false
}
const (
BackupKeyPrefix = "backup."
)
func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, clientId int32, clientEpoch int32) error {
// find data sink
dataSink := findSink(util.GetViper())
if dataSink == nil {
return fmt.Errorf("no data sink configured in replication.toml")
}
sourceFiler := pb.ServerAddress(*backupOption.filer)
sourcePath := *backupOption.path
excludePaths := util.StringSplit(*backupOption.excludePaths, ",")
var reExcludeFileName *regexp.Regexp
if *backupOption.excludeFileName != "" {
var err error
if reExcludeFileName, err = regexp.Compile(*backupOption.excludeFileName); err != nil {
return fmt.Errorf("error compile regexp %v for exclude file name: %+v", *backupOption.excludeFileName, err)
}
}
timeAgo := *backupOption.timeAgo
targetPath := dataSink.GetSinkToDirectory()
debug := *backupOption.debug
// get start time for the data sink
startFrom := time.Unix(0, 0)
sinkId := util.HashStringToLong(dataSink.GetName() + dataSink.GetSinkToDirectory())
if timeAgo.Milliseconds() == 0 {
lastOffsetTsNs, err := getOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, int32(sinkId))
if err != nil {
glog.V(0).Infof("starting from %v", startFrom)
} else {
startFrom = time.Unix(0, lastOffsetTsNs)
glog.V(0).Infof("resuming from %v", startFrom)
}
} else {
startFrom = time.Now().Add(-timeAgo)
glog.V(0).Infof("start time is set to %v", startFrom)
}
// create filer sink
filerSource := &source.FilerSource{}
filerSource.DoInitialize(
sourceFiler.ToHttpAddress(),
sourceFiler.ToGrpcAddress(),
sourcePath,
*backupOption.proxyByFiler)
dataSink.SetSourceFiler(filerSource)
var processEventFn func(*filer_pb.SubscribeMetadataResponse) error
if *backupOption.ignore404Error {
processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug)
processEventFn = func(resp *filer_pb.SubscribeMetadataResponse) error {
err := processEventFnGenerated(resp)
if err == nil {
return nil
}
if errors.Is(err, http.ErrNotFound) {
glog.V(0).Infof("got 404 error, ignore it: %s", err.Error())
return nil
}
return err
}
} else {
processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug)
}
processEventFnWithOffset := pb.AddOffsetFunc(processEventFn, 3*time.Second, func(counter int64, lastTsNs int64) error {
glog.V(0).Infof("backup %s progressed to %v %0.2f/sec", sourceFiler, time.Unix(0, lastTsNs), float64(counter)/float64(3))
return setOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, int32(sinkId), lastTsNs)
})
if dataSink.IsIncremental() && *filerBackupOptions.retentionDays > 0 {
go func() {
for {
now := time.Now()
time.Sleep(time.Hour * 24)
key := util.Join(targetPath, now.Add(-1*time.Hour*24*time.Duration(*filerBackupOptions.retentionDays)).Format("2006-01-02"))
_ = dataSink.DeleteEntry(util.Join(targetPath, key), true, true, nil)
glog.V(0).Infof("incremental backup delete directory:%s", key)
}
}()
}
prefix := sourcePath
if !strings.HasSuffix(prefix, "/") {
prefix = prefix + "/"
}
eventErrorType := pb.RetryForeverOnError
if *backupOption.disableErrorRetry {
eventErrorType = pb.TrivialOnError
}
metadataFollowOption := &pb.MetadataFollowOption{
ClientName: "backup_" + dataSink.GetName(),
ClientId: clientId,
ClientEpoch: clientEpoch,
SelfSignature: 0,
PathPrefix: prefix,
AdditionalPathPrefixes: nil,
DirectoriesToWatch: nil,
StartTsNs: startFrom.UnixNano(),
StopTsNs: 0,
EventErrorType: eventErrorType,
}
return pb.FollowMetadata(sourceFiler, grpcDialOption, metadataFollowOption, processEventFnWithOffset)
}