Files
seaweedFS/weed/mq/broker/broker_server.go
Chris Lu 995dfc4d5d chore: remove ~50k lines of unreachable dead code (#8913)
* chore: remove unreachable dead code across the codebase

Remove ~50,000 lines of unreachable code identified by static analysis.

Major removals:
- weed/filer/redis_lua: entire unused Redis Lua filer store implementation
- weed/wdclient/net2, resource_pool: unused connection/resource pool packages
- weed/plugin/worker/lifecycle: unused lifecycle plugin worker
- weed/s3api: unused S3 policy templates, presigned URL IAM, streaming copy,
  multipart IAM, key rotation, and various SSE helper functions
- weed/mq/kafka: unused partition mapping, compression, schema, and protocol functions
- weed/mq/offset: unused SQL storage and migration code
- weed/worker: unused registry, task, and monitoring functions
- weed/query: unused SQL engine, parquet scanner, and type functions
- weed/shell: unused EC proportional rebalance functions
- weed/storage/erasure_coding/distribution: unused distribution analysis functions
- Individual unreachable functions removed from 150+ files across admin,
  credential, filer, iam, kms, mount, mq, operation, pb, s3api, server,
  shell, storage, topology, and util packages

* fix(s3): reset shared memory store in IAM test to prevent flaky failure

TestLoadIAMManagerFromConfig_EmptyConfigWithFallbackKey was flaky because
the MemoryStore credential backend is a singleton registered via init().
Earlier tests that create anonymous identities pollute the shared store,
causing LookupAnonymous() to unexpectedly return true.

Fix by calling Reset() on the memory store before the test runs.

* style: run gofmt on changed files

* fix: restore KMS functions used by integration tests

* fix(plugin): prevent panic on send to closed worker session channel

The Plugin.sendToWorker method could panic with "send on closed channel"
when a worker disconnected while a message was being sent. The race was
between streamSession.close() closing the outgoing channel and sendToWorker
writing to it concurrently.

Add a done channel to streamSession that is closed before the outgoing
channel, and check it in sendToWorker's select to safely detect closed
sessions without panicking.
2026-04-03 16:04:27 -07:00

213 lines
7.7 KiB
Go

package broker
import (
"context"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer_client"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/mq/pub_balancer"
"github.com/seaweedfs/seaweedfs/weed/mq/sub_coordinator"
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
"google.golang.org/grpc"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
)
type MessageQueueBrokerOption struct {
Masters map[string]pb.ServerAddress
FilerGroup string
DataCenter string
Rack string
DefaultReplication string
MaxMB int
Ip string
Port int
Cipher bool
VolumeServerAccess string // how to access volume servers
LogFlushInterval int // log buffer flush interval in seconds
}
func (option *MessageQueueBrokerOption) BrokerAddress() pb.ServerAddress {
return pb.NewServerAddress(option.Ip, option.Port, 0)
}
// topicCacheEntry caches both topic existence and configuration
// If conf is nil, topic doesn't exist (negative cache)
// If conf is non-nil, topic exists with this configuration (positive cache)
type topicCacheEntry struct {
conf *mq_pb.ConfigureTopicResponse // nil = topic doesn't exist
expiresAt time.Time
}
type MessageQueueBroker struct {
mq_pb.UnimplementedSeaweedMessagingServer
option *MessageQueueBrokerOption
grpcDialOption grpc.DialOption
MasterClient *wdclient.MasterClient
filers map[pb.ServerAddress]struct{}
currentFiler pb.ServerAddress
localTopicManager *topic.LocalTopicManager
PubBalancer *pub_balancer.PubBalancer
lockAsBalancer *cluster.LiveLock
// TODO: Add native offset management to broker
// ASSUMPTION: BrokerOffsetManager handles all partition offset assignment
offsetManager *BrokerOffsetManager
SubCoordinator *sub_coordinator.SubCoordinator
// Removed gatewayRegistry - no longer needed
accessLock sync.Mutex
fca *filer_client.FilerClientAccessor
// Unified topic cache for both existence and configuration
// Caches topic config (positive: conf != nil) and non-existence (negative: conf == nil)
// Eliminates 60% CPU overhead from repeated filer reads and JSON unmarshaling
topicCache map[string]*topicCacheEntry
topicCacheMu sync.RWMutex
topicCacheTTL time.Duration
}
func NewMessageBroker(option *MessageQueueBrokerOption, grpcDialOption grpc.DialOption) (mqBroker *MessageQueueBroker, err error) {
pubBalancer := pub_balancer.NewPubBalancer()
subCoordinator := sub_coordinator.NewSubCoordinator()
mqBroker = &MessageQueueBroker{
option: option,
grpcDialOption: grpcDialOption,
MasterClient: wdclient.NewMasterClient(grpcDialOption, option.FilerGroup, cluster.BrokerType, option.BrokerAddress(), option.DataCenter, option.Rack, *pb.NewServiceDiscoveryFromMap(option.Masters)),
filers: make(map[pb.ServerAddress]struct{}),
localTopicManager: topic.NewLocalTopicManager(),
PubBalancer: pubBalancer,
SubCoordinator: subCoordinator,
offsetManager: nil, // Will be initialized below
topicCache: make(map[string]*topicCacheEntry),
topicCacheTTL: 30 * time.Second, // Unified cache for existence + config (eliminates 60% CPU overhead)
}
// Create FilerClientAccessor that adapts broker's single filer to the new multi-filer interface
fca := &filer_client.FilerClientAccessor{
GetGrpcDialOption: mqBroker.GetGrpcDialOption,
GetFilers: func() []pb.ServerAddress {
filer := mqBroker.GetFiler()
if filer != "" {
return []pb.ServerAddress{filer}
}
return []pb.ServerAddress{}
},
}
mqBroker.fca = fca
subCoordinator.FilerClientAccessor = fca
mqBroker.MasterClient.SetOnPeerUpdateFn(mqBroker.OnBrokerUpdate)
pubBalancer.OnPartitionChange = mqBroker.SubCoordinator.OnPartitionChange
go mqBroker.MasterClient.KeepConnectedToMaster(context.Background())
// Initialize offset manager using the filer accessor
// The filer accessor will automatically use the current filer address as it gets discovered
// No hardcoded namespace/topic - offset storage now derives paths from actual topic information
mqBroker.offsetManager = NewBrokerOffsetManagerWithFilerAccessor(fca)
glog.V(0).Infof("broker initialized offset manager with filer accessor (current filer: %s)", mqBroker.GetFiler())
// Start idle partition cleanup task
// Cleans up partitions with no publishers/subscribers after 5 minutes of idle time
// Checks every 1 minute to avoid memory bloat from short-lived topics
mqBroker.localTopicManager.StartIdlePartitionCleanup(
context.Background(),
1*time.Minute, // Check interval
5*time.Minute, // Idle timeout - clean up after 5 minutes of no activity
)
glog.V(0).Info("Started idle partition cleanup task (check: 1m, timeout: 5m)")
existingNodes := cluster.ListExistingPeerUpdates(mqBroker.MasterClient.GetMaster(context.Background()), grpcDialOption, option.FilerGroup, cluster.FilerType)
for _, newNode := range existingNodes {
mqBroker.OnBrokerUpdate(newNode, time.Now())
}
// keep connecting to balancer
go func() {
for mqBroker.currentFiler == "" {
time.Sleep(time.Millisecond * 237)
}
self := option.BrokerAddress()
glog.V(0).Infof("broker %s found filer %s", self, mqBroker.currentFiler)
newBrokerBalancerCh := make(chan string, 1)
lockClient := cluster.NewLockClient(grpcDialOption, mqBroker.currentFiler)
mqBroker.lockAsBalancer = lockClient.StartLongLivedLock(pub_balancer.LockBrokerBalancer, string(self), func(newLockOwner string) {
glog.V(0).Infof("broker %s found balanacer %s", self, newLockOwner)
newBrokerBalancerCh <- newLockOwner
}, lock_manager.LiveLockTTL)
mqBroker.KeepConnectedToBrokerBalancer(newBrokerBalancerCh)
}()
return mqBroker, nil
}
func (b *MessageQueueBroker) OnBrokerUpdate(update *master_pb.ClusterNodeUpdate, startFrom time.Time) {
if update.NodeType != cluster.FilerType {
return
}
address := pb.ServerAddress(update.Address)
if update.IsAdd {
b.filers[address] = struct{}{}
if b.currentFiler == "" {
b.currentFiler = address
// The offset manager will automatically use the updated filer through the filer accessor
glog.V(0).Infof("broker discovered filer %s (offset manager will automatically use it via filer accessor)", address)
}
} else {
delete(b.filers, address)
if b.currentFiler == address {
for filer := range b.filers {
b.currentFiler = filer
// The offset manager will automatically use the new filer through the filer accessor
glog.V(0).Infof("broker switched to filer %s (offset manager will automatically use it)", filer)
break
}
}
}
}
func (b *MessageQueueBroker) GetGrpcDialOption() grpc.DialOption {
return b.grpcDialOption
}
func (b *MessageQueueBroker) GetFiler() pb.ServerAddress {
return b.currentFiler
}
func (b *MessageQueueBroker) WithFilerClient(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error {
return pb.WithFilerClient(streamingMode, 0, b.GetFiler(), b.grpcDialOption, fn)
}
func (b *MessageQueueBroker) AdjustedUrl(location *filer_pb.Location) string {
return location.Url
}
func (b *MessageQueueBroker) GetDataCenter() string {
return ""
}
func (b *MessageQueueBroker) withBrokerClient(streamingMode bool, server pb.ServerAddress, fn func(client mq_pb.SeaweedMessagingClient) error) error {
return pb.WithBrokerGrpcClient(streamingMode, server.String(), b.grpcDialOption, func(client mq_pb.SeaweedMessagingClient) error {
return fn(client)
})
}