Add message queue agent (#6463)

* scaffold message queue agent

* adjust proto, add mq_agent

* add agent client implementation

* remove unused function

* agent publish server implementation

* adding agent
This commit is contained in:
Chris Lu
2025-01-20 22:19:27 -08:00
committed by GitHub
parent b2f56d9add
commit cc05874d06
57 changed files with 3802 additions and 1562 deletions

View File

@@ -0,0 +1,63 @@
package agent_client
import (
"context"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_agent_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"google.golang.org/grpc"
)
type SubscribeOption struct {
ConsumerGroup string
ConsumerGroupInstanceId string
Topic topic.Topic
Filter string
MaxSubscribedPartitions int32
PerPartitionConcurrency int32
}
type SubscribeSession struct {
Option *SubscribeOption
stream grpc.BidiStreamingClient[mq_agent_pb.SubscribeRecordRequest, mq_agent_pb.SubscribeRecordResponse]
sessionId int64
}
func NewSubscribeSession(agentAddress string, option *SubscribeOption) (*SubscribeSession, error) {
// call local agent grpc server to create a new session
clientConn, err := pb.GrpcDial(context.Background(), agentAddress, true, grpc.WithInsecure())
if err != nil {
return nil, fmt.Errorf("dial agent server %s: %v", agentAddress, err)
}
agentClient := mq_agent_pb.NewSeaweedMessagingAgentClient(clientConn)
resp, err := agentClient.StartSubscribeSession(context.Background(), &mq_agent_pb.StartSubscribeSessionRequest{
ConsumerGroup: option.ConsumerGroup,
ConsumerGroupInstanceId: option.ConsumerGroupInstanceId,
Topic: &schema_pb.Topic{
Namespace: option.Topic.Namespace,
Name: option.Topic.Name,
},
MaxSubscribedPartitions: option.MaxSubscribedPartitions,
Filter: option.Filter,
})
if err != nil {
return nil, err
}
if resp.Error != "" {
return nil, fmt.Errorf("start subscribe session: %v", resp.Error)
}
stream, err := agentClient.SubscribeRecord(context.Background())
if err != nil {
return nil, fmt.Errorf("subscribe record: %v", err)
}
return &SubscribeSession{
Option: option,
stream: stream,
sessionId: resp.SessionId,
}, nil
}

View File

@@ -0,0 +1,14 @@
package agent_client
import (
"github.com/seaweedfs/seaweedfs/weed/pb/mq_agent_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
)
func (a *PublishSession) PublishMessageRecord(key []byte, record *schema_pb.RecordValue) error {
return a.stream.Send(&mq_agent_pb.PublishRecordRequest{
SessionId: a.sessionId,
Key: key,
Value: record,
})
}

View File

@@ -0,0 +1,17 @@
package agent_client
import (
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
)
func (a *SubscribeSession) SubscribeMessageRecord(
onEachMessageFn func(key []byte, record *schema_pb.RecordValue),
onCompletionFn func()) error {
for {
resp, err := a.stream.Recv()
if err != nil {
return err
}
onEachMessageFn(resp.Key, resp.Value)
}
}

View File

@@ -0,0 +1,70 @@
package agent_client
import (
"context"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/mq/schema"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_agent_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"google.golang.org/grpc"
)
type PublishSession struct {
schema *schema.Schema
partitionCount int
publisherName string
stream grpc.BidiStreamingClient[mq_agent_pb.PublishRecordRequest, mq_agent_pb.PublishRecordResponse]
sessionId int64
}
func NewPublishSession(agentAddress string, topicSchema *schema.Schema, partitionCount int, publisherName string) (*PublishSession, error) {
// call local agent grpc server to create a new session
clientConn, err := pb.GrpcDial(context.Background(), agentAddress, true, grpc.WithInsecure())
if err != nil {
return nil, fmt.Errorf("dial agent server %s: %v", agentAddress, err)
}
agentClient := mq_agent_pb.NewSeaweedMessagingAgentClient(clientConn)
resp, err := agentClient.StartPublishSession(context.Background(), &mq_agent_pb.StartPublishSessionRequest{
Topic: &schema_pb.Topic{
Namespace: topicSchema.Namespace,
Name: topicSchema.Name,
},
PartitionCount: int32(partitionCount),
RecordType: topicSchema.RecordType,
PublisherName: publisherName,
})
if err != nil {
return nil, err
}
if resp.Error != "" {
return nil, fmt.Errorf("start publish session: %v", resp.Error)
}
stream, err := agentClient.PublishRecord(context.Background())
if err != nil {
return nil, fmt.Errorf("publish record: %v", err)
}
return &PublishSession{
schema: topicSchema,
partitionCount: partitionCount,
publisherName: publisherName,
stream: stream,
sessionId: resp.SessionId,
}, nil
}
func (a *PublishSession) CloseSession() error {
if a.schema == nil {
return nil
}
err := a.stream.CloseSend()
if err != nil {
return fmt.Errorf("close send: %v", err)
}
a.schema = nil
return err
}

View File

@@ -0,0 +1,128 @@
package main
import (
"flag"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/mq/client/agent_client"
"github.com/seaweedfs/seaweedfs/weed/mq/schema"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"log"
"sync"
"sync/atomic"
"time"
)
var (
messageCount = flag.Int("n", 1000, "message count")
messageDelay = flag.Duration("d", time.Second, "delay between messages")
concurrency = flag.Int("c", 4, "concurrent publishers")
partitionCount = flag.Int("p", 6, "partition count")
clientName = flag.String("client", "c1", "client name")
namespace = flag.String("ns", "test", "namespace")
t = flag.String("t", "test", "t")
seedBrokers = flag.String("brokers", "localhost:17777", "seed brokers")
counter int32
)
func doPublish(publisher *agent_client.PublishSession, id int) {
startTime := time.Now()
for {
i := atomic.AddInt32(&counter, 1)
if i > int32(*messageCount) {
break
}
// Simulate publishing a message
myRecord := genMyRecord(int32(i))
if err := publisher.PublishMessageRecord(myRecord.Key, myRecord.ToRecordValue()); err != nil {
fmt.Println(err)
break
}
if *messageDelay > 0 {
time.Sleep(*messageDelay)
fmt.Printf("sent %+v\n", string(myRecord.Key))
}
}
elapsed := time.Since(startTime)
log.Printf("Publisher %s-%d finished in %s", *clientName, id, elapsed)
}
type MyRecord struct {
Key []byte
Field1 []byte
Field2 string
Field3 int32
Field4 int64
Field5 float32
Field6 float64
Field7 bool
}
func genMyRecord(id int32) *MyRecord {
return &MyRecord{
Key: []byte(fmt.Sprintf("key-%s-%d", *clientName, id)),
Field1: []byte(fmt.Sprintf("field1-%s-%d", *clientName, id)),
Field2: fmt.Sprintf("field2-%s-%d", *clientName, id),
Field3: id,
Field4: int64(id),
Field5: float32(id),
Field6: float64(id),
Field7: id%2 == 0,
}
}
func (r *MyRecord) ToRecordValue() *schema_pb.RecordValue {
return schema.RecordBegin().
SetBytes("key", r.Key).
SetBytes("field1", r.Field1).
SetString("field2", r.Field2).
SetInt32("field3", r.Field3).
SetInt64("field4", r.Field4).
SetFloat("field5", r.Field5).
SetDouble("field6", r.Field6).
SetBool("field7", r.Field7).
RecordEnd()
}
func main() {
flag.Parse()
recordType := schema.RecordTypeBegin().
WithField("key", schema.TypeBytes).
WithField("field1", schema.TypeBytes).
WithField("field2", schema.TypeString).
WithField("field3", schema.TypeInt32).
WithField("field4", schema.TypeInt64).
WithField("field5", schema.TypeFloat).
WithField("field6", schema.TypeDouble).
WithField("field7", schema.TypeBoolean).
RecordTypeEnd()
session, err := agent_client.NewPublishSession("localhost:16777", schema.NewSchema(*namespace, *t, recordType), *partitionCount, *clientName)
if err != nil {
log.Printf("failed to create session: %v", err)
return
}
defer session.CloseSession()
startTime := time.Now()
var wg sync.WaitGroup
// Start multiple publishers
for i := 0; i < *concurrency; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
doPublish(session, id)
}(i)
}
// Wait for all publishers to finish
wg.Wait()
elapsed := time.Since(startTime)
log.Printf("Published %d messages in %s (%.2f msg/s)", *messageCount, elapsed, float64(*messageCount)/elapsed.Seconds())
}

View File

@@ -6,12 +6,12 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/mq/client/sub_client"
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"strings"
"time"
)
var (
@@ -33,23 +33,24 @@ func main() {
ConsumerGroupInstanceId: fmt.Sprintf("client-%d", *clientId),
GrpcDialOption: grpc.WithTransportCredentials(insecure.NewCredentials()),
MaxPartitionCount: int32(*maxPartitionCount),
PerPartitionConcurrency: int32(*perPartitionConcurrency),
SlidingWindowSize: int32(*perPartitionConcurrency),
}
contentConfig := &sub_client.ContentConfiguration{
Topic: topic.NewTopic(*namespace, *t),
Filter: "",
StartTime: time.Unix(1, 1),
Topic: topic.NewTopic(*namespace, *t),
Filter: "",
}
brokers := strings.Split(*seedBrokers, ",")
subscriber := sub_client.NewTopicSubscriber(brokers, subscriberConfig, contentConfig)
subscriber := sub_client.NewTopicSubscriber(brokers, subscriberConfig, contentConfig, make(chan sub_client.KeyedOffset, 1024))
counter := 0
subscriber.SetEachMessageFunc(func(key, value []byte) error {
counter++
println(string(key), "=>", string(value), counter)
return nil
executors := util.NewLimitedConcurrentExecutor(int(subscriberConfig.SlidingWindowSize))
subscriber.SetOnDataMessageFn(func(m *mq_pb.SubscribeMessageResponse_Data) {
executors.Execute(func() {
counter++
println(string(m.Data.Key), "=>", string(m.Data.Value), counter)
})
})
subscriber.SetCompletionFunc(func() {

View File

@@ -6,6 +6,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/mq/client/sub_client"
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
@@ -60,30 +61,31 @@ func main() {
ConsumerGroupInstanceId: fmt.Sprintf("client-%d", *clientId),
GrpcDialOption: grpc.WithTransportCredentials(insecure.NewCredentials()),
MaxPartitionCount: int32(*maxPartitionCount),
PerPartitionConcurrency: int32(*perPartitionConcurrency),
SlidingWindowSize: int32(*perPartitionConcurrency),
}
contentConfig := &sub_client.ContentConfiguration{
Topic: topic.NewTopic(*namespace, *t),
Filter: "",
StartTime: time.Now().Add(-*timeAgo),
Topic: topic.NewTopic(*namespace, *t),
Filter: "",
// StartTime: time.Now().Add(-*timeAgo),
}
brokers := strings.Split(*seedBrokers, ",")
subscriber := sub_client.NewTopicSubscriber(brokers, subscriberConfig, contentConfig)
subscriber := sub_client.NewTopicSubscriber(brokers, subscriberConfig, contentConfig, make(chan sub_client.KeyedOffset, 1024))
counter := 0
subscriber.SetEachMessageFunc(func(key, value []byte) error {
counter++
record := &schema_pb.RecordValue{}
err := proto.Unmarshal(value, record)
if err != nil {
fmt.Printf("unmarshal record value: %v\n", err)
} else {
fmt.Printf("%s %d: %v\n", string(key), len(value), record)
}
//time.Sleep(1300 * time.Millisecond)
return nil
executors := util.NewLimitedConcurrentExecutor(int(subscriberConfig.SlidingWindowSize))
subscriber.SetOnDataMessageFn(func(m *mq_pb.SubscribeMessageResponse_Data) {
executors.Execute(func() {
counter++
record := &schema_pb.RecordValue{}
err := proto.Unmarshal(m.Data.Value, record)
if err != nil {
fmt.Printf("unmarshal record value: %v\n", err)
} else {
fmt.Printf("%s %d: %v\n", string(m.Data.Key), len(m.Data.Value), record)
}
})
})
subscriber.SetCompletionFunc(func() {

View File

@@ -6,12 +6,18 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"io"
"reflect"
"time"
)
func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssignment, stopCh chan struct{}) error {
type KeyedOffset struct {
Key []byte
Offset int64
}
func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssignment, stopCh chan struct{}, onDataMessageFn OnDataMessageFn) error {
// connect to the partition broker
return pb.WithBrokerGrpcClient(true, assigned.LeaderBroker, sub.SubscriberConfig.GrpcDialOption, func(client mq_pb.SeaweedMessagingClient) error {
@@ -20,31 +26,30 @@ func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssig
return fmt.Errorf("create subscribe client: %v", err)
}
perPartitionConcurrency := sub.SubscriberConfig.PerPartitionConcurrency
if perPartitionConcurrency <= 0 {
perPartitionConcurrency = 1
slidingWindowSize := sub.SubscriberConfig.SlidingWindowSize
if slidingWindowSize <= 0 {
slidingWindowSize = 1
}
var stopTsNs int64
if !sub.ContentConfig.StopTime.IsZero() {
stopTsNs = sub.ContentConfig.StopTime.UnixNano()
po := findPartitionOffset(sub.ContentConfig.PartitionOffsets, assigned.Partition)
if po == nil {
po = &schema_pb.PartitionOffset{
Partition: assigned.Partition,
StartTsNs: time.Now().UnixNano(),
StartType: schema_pb.PartitionOffsetStartType_EARLIEST_IN_MEMORY,
}
}
if err = subscribeClient.Send(&mq_pb.SubscribeMessageRequest{
Message: &mq_pb.SubscribeMessageRequest_Init{
Init: &mq_pb.SubscribeMessageRequest_InitMessage{
ConsumerGroup: sub.SubscriberConfig.ConsumerGroup,
ConsumerId: sub.SubscriberConfig.ConsumerGroupInstanceId,
Topic: sub.ContentConfig.Topic.ToPbTopic(),
PartitionOffset: &mq_pb.PartitionOffset{
Partition: assigned.Partition,
StartTsNs: sub.ContentConfig.StartTime.UnixNano(),
StopTsNs: stopTsNs,
StartType: mq_pb.PartitionOffsetStartType_EARLIEST_IN_MEMORY,
},
Filter: sub.ContentConfig.Filter,
FollowerBroker: assigned.FollowerBroker,
Concurrency: perPartitionConcurrency,
ConsumerGroup: sub.SubscriberConfig.ConsumerGroup,
ConsumerId: sub.SubscriberConfig.ConsumerGroupInstanceId,
Topic: sub.ContentConfig.Topic.ToPbTopic(),
PartitionOffset: po,
Filter: sub.ContentConfig.Filter,
FollowerBroker: assigned.FollowerBroker,
SlidingWindowSize: slidingWindowSize,
},
},
}); err != nil {
@@ -57,24 +62,13 @@ func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssig
defer sub.OnCompletionFunc()
}
type KeyedOffset struct {
Key []byte
Offset int64
}
partitionOffsetChan := make(chan KeyedOffset, 1024)
defer func() {
close(partitionOffsetChan)
}()
executors := util.NewLimitedConcurrentExecutor(int(perPartitionConcurrency))
go func() {
for {
select {
case <-stopCh:
subscribeClient.CloseSend()
return
case ack, ok := <-partitionOffsetChan:
case ack, ok := <-sub.PartitionOffsetChan:
if !ok {
subscribeClient.CloseSend()
return
@@ -91,9 +85,7 @@ func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssig
}
}()
var lastErr error
for lastErr == nil {
for {
// glog.V(0).Infof("subscriber %s/%s/%s waiting for message", sub.ContentConfig.Namespace, sub.ContentConfig.Topic, sub.SubscriberConfig.ConsumerGroup)
resp, err := subscribeClient.Recv()
if err != nil {
@@ -113,17 +105,7 @@ func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssig
fmt.Printf("empty key %+v, type %v\n", m, reflect.TypeOf(m))
continue
}
executors.Execute(func() {
processErr := sub.OnEachMessageFunc(m.Data.Key, m.Data.Value)
if processErr == nil {
partitionOffsetChan <- KeyedOffset{
Key: m.Data.Key,
Offset: m.Data.TsNs,
}
} else {
lastErr = processErr
}
})
onDataMessageFn(m)
case *mq_pb.SubscribeMessageResponse_Ctrl:
// glog.V(0).Infof("subscriber %s/%s/%s received control %+v", sub.ContentConfig.Namespace, sub.ContentConfig.Topic, sub.SubscriberConfig.ConsumerGroup, m.Ctrl)
if m.Ctrl.IsEndOfStream || m.Ctrl.IsEndOfTopic {
@@ -132,6 +114,14 @@ func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssig
}
}
return lastErr
})
}
func findPartitionOffset(partitionOffsets []*schema_pb.PartitionOffset, partition *schema_pb.Partition) *schema_pb.PartitionOffset {
for _, po := range partitionOffsets {
if po.Partition == partition {
return po
}
}
return nil
}

View File

@@ -4,6 +4,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"sync"
"time"
)
@@ -20,6 +21,7 @@ func (sub *TopicSubscriber) Subscribe() error {
go sub.startProcessors()
// loop forever
// TODO shutdown the subscriber when not needed anymore
sub.doKeepConnectedToSubCoordinator()
return nil
@@ -66,7 +68,21 @@ func (sub *TopicSubscriber) startProcessors() {
},
},
}
err := sub.onEachPartition(assigned, stopChan)
executors := util.NewLimitedConcurrentExecutor(int(sub.SubscriberConfig.SlidingWindowSize))
onDataMessageFn := func(m *mq_pb.SubscribeMessageResponse_Data) {
executors.Execute(func() {
processErr := sub.OnEachMessageFunc(m.Data.Key, m.Data.Value)
if processErr == nil {
sub.PartitionOffsetChan <- KeyedOffset{
Key: m.Data.Key,
Offset: m.Data.TsNs,
}
}
})
}
err := sub.onEachPartition(assigned, stopChan, onDataMessageFn)
if err != nil {
glog.V(0).Infof("subscriber %s/%s partition %+v at %v: %v", sub.ContentConfig.Topic, sub.SubscriberConfig.ConsumerGroup, assigned.Partition, assigned.LeaderBroker, err)
} else {

View File

@@ -3,9 +3,9 @@ package sub_client
import (
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"google.golang.org/grpc"
"sync"
"time"
)
type SubscriberConfiguration struct {
@@ -14,16 +14,16 @@ type SubscriberConfiguration struct {
ConsumerGroupInstanceId string
GrpcDialOption grpc.DialOption
MaxPartitionCount int32 // how many partitions to process concurrently
PerPartitionConcurrency int32 // how many messages to process concurrently per partition
SlidingWindowSize int32 // how many messages to process concurrently per partition
}
type ContentConfiguration struct {
Topic topic.Topic
Filter string
StartTime time.Time
StopTime time.Time
Topic topic.Topic
Filter string
PartitionOffsets []*schema_pb.PartitionOffset
}
type OnDataMessageFn func(m *mq_pb.SubscribeMessageResponse_Data)
type OnEachMessageFunc func(key, value []byte) (err error)
type OnCompletionFunc func()
@@ -32,15 +32,17 @@ type TopicSubscriber struct {
ContentConfig *ContentConfiguration
brokerPartitionAssignmentChan chan *mq_pb.SubscriberToSubCoordinatorResponse
brokerPartitionAssignmentAckChan chan *mq_pb.SubscriberToSubCoordinatorRequest
OnDataMessageFnnc OnDataMessageFn
OnEachMessageFunc OnEachMessageFunc
OnCompletionFunc OnCompletionFunc
bootstrapBrokers []string
waitForMoreMessage bool
activeProcessors map[topic.Partition]*ProcessorState
activeProcessorsLock sync.Mutex
PartitionOffsetChan chan KeyedOffset
}
func NewTopicSubscriber(bootstrapBrokers []string, subscriber *SubscriberConfiguration, content *ContentConfiguration) *TopicSubscriber {
func NewTopicSubscriber(bootstrapBrokers []string, subscriber *SubscriberConfiguration, content *ContentConfiguration, partitionOffsetChan chan KeyedOffset) *TopicSubscriber {
return &TopicSubscriber{
SubscriberConfig: subscriber,
ContentConfig: content,
@@ -49,6 +51,7 @@ func NewTopicSubscriber(bootstrapBrokers []string, subscriber *SubscriberConfigu
bootstrapBrokers: bootstrapBrokers,
waitForMoreMessage: true,
activeProcessors: make(map[topic.Partition]*ProcessorState),
PartitionOffsetChan: partitionOffsetChan,
}
}
@@ -56,6 +59,10 @@ func (sub *TopicSubscriber) SetEachMessageFunc(onEachMessageFn OnEachMessageFunc
sub.OnEachMessageFunc = onEachMessageFn
}
func (sub *TopicSubscriber) SetOnDataMessageFn(fn OnDataMessageFn) {
sub.OnDataMessageFnnc = fn
}
func (sub *TopicSubscriber) SetCompletionFunc(onCompletionFn OnCompletionFunc) {
sub.OnCompletionFunc = onCompletionFn
}