merge current message queue code changes (#6201)

* listing files to convert to parquet

* write parquet files

* save logs into parquet files

* pass by value

* compact logs into parquet format

* can skip existing files

* refactor

* refactor

* fix compilation

* when no partition found

* refactor

* add untested parquet file read

* rename package

* refactor

* rename files

* remove unused

* add merged log read func

* parquet wants to know the file size

* rewind by time

* pass in stop ts

* add stop ts

* adjust log

* minor

* adjust log

* skip .parquet files when reading message logs

* skip non message files

* Update subscriber_record.go

* send messages

* skip message data with only ts

* skip non log files

* update parquet-go package

* ensure a valid record type

* add new field to a record type

* Update read_parquet_to_log.go

* fix parquet file name generation

* separating reading parquet and logs

* add key field

* add skipped logs

* use in memory cache

* refactor

* refactor

* refactor

* refactor, and change compact log

* refactor

* rename

* refactor

* fix format

* prefix v to version directory
This commit is contained in:
Chris Lu
2024-11-04 12:08:25 -08:00
committed by GitHub
parent ffe908371d
commit dc784bf217
33 changed files with 1106 additions and 264 deletions

View File

@@ -7,11 +7,12 @@ import (
"github.com/seaweedfs/seaweedfs/weed/mq/schema"
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"log"
"strings"
"sync"
"sync/atomic"
"time"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
)
var (
@@ -25,11 +26,17 @@ var (
namespace = flag.String("ns", "test", "namespace")
t = flag.String("t", "test", "t")
seedBrokers = flag.String("brokers", "localhost:17777", "seed brokers")
counter int32
)
func doPublish(publisher *pub_client.TopicPublisher, id int) {
startTime := time.Now()
for i := 0; i < *messageCount / *concurrency; i++ {
for {
i := atomic.AddInt32(&counter, 1)
if i > int32(*messageCount) {
break
}
// Simulate publishing a message
myRecord := genMyRecord(int32(i))
if err := publisher.PublishRecord(myRecord.Key, myRecord.ToRecordValue()); err != nil {
@@ -38,7 +45,7 @@ func doPublish(publisher *pub_client.TopicPublisher, id int) {
}
if *messageDelay > 0 {
time.Sleep(*messageDelay)
fmt.Printf("sent %+v\n", myRecord)
fmt.Printf("sent %+v\n", string(myRecord.Key))
}
}
if err := publisher.FinishPublish(); err != nil {

View File

@@ -8,12 +8,12 @@ import (
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/protobuf/proto"
"strings"
"time"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
)
var (
@@ -22,6 +22,7 @@ var (
seedBrokers = flag.String("brokers", "localhost:17777", "seed brokers")
maxPartitionCount = flag.Int("maxPartitionCount", 3, "max partition count")
perPartitionConcurrency = flag.Int("perPartitionConcurrency", 1, "per partition concurrency")
timeAgo = flag.Duration("timeAgo", 1*time.Hour, "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"")
clientId = flag.Uint("client_id", uint(util.RandomInt32()), "client id")
)
@@ -65,7 +66,7 @@ func main() {
contentConfig := &sub_client.ContentConfiguration{
Topic: topic.NewTopic(*namespace, *t),
Filter: "",
StartTime: time.Unix(1, 1),
StartTime: time.Now().Add(-*timeAgo),
}
brokers := strings.Split(*seedBrokers, ",")
@@ -75,9 +76,13 @@ func main() {
subscriber.SetEachMessageFunc(func(key, value []byte) error {
counter++
record := &schema_pb.RecordValue{}
proto.Unmarshal(value, record)
fmt.Printf("record: %v\n", record)
time.Sleep(1300 * time.Millisecond)
err := proto.Unmarshal(value, record)
if err != nil {
fmt.Printf("unmarshal record value: %v\n", err)
} else {
fmt.Printf("%s %d: %v\n", string(key), len(value), record)
}
//time.Sleep(1300 * time.Millisecond)
return nil
})

View File

@@ -8,6 +8,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"io"
"reflect"
)
func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssignment, stopCh chan struct{}) error {
@@ -24,6 +25,11 @@ func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssig
perPartitionConcurrency = 1
}
var stopTsNs int64
if !sub.ContentConfig.StopTime.IsZero() {
stopTsNs = sub.ContentConfig.StopTime.UnixNano()
}
if err = subscribeClient.Send(&mq_pb.SubscribeMessageRequest{
Message: &mq_pb.SubscribeMessageRequest_Init{
Init: &mq_pb.SubscribeMessageRequest_InitMessage{
@@ -32,6 +38,8 @@ func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssig
Topic: sub.ContentConfig.Topic.ToPbTopic(),
PartitionOffset: &mq_pb.PartitionOffset{
Partition: assigned.Partition,
StartTsNs: sub.ContentConfig.StartTime.UnixNano(),
StopTsNs: stopTsNs,
StartType: mq_pb.PartitionOffsetStartType_EARLIEST_IN_MEMORY,
},
Filter: sub.ContentConfig.Filter,
@@ -101,6 +109,10 @@ func (sub *TopicSubscriber) onEachPartition(assigned *mq_pb.BrokerPartitionAssig
glog.V(2).Infof("subscriber %s received control from producer:%s isClose:%v", sub.SubscriberConfig.ConsumerGroup, m.Data.Ctrl.PublisherName, m.Data.Ctrl.IsClose)
continue
}
if len(m.Data.Key) == 0 {
fmt.Printf("empty key %+v, type %v\n", m, reflect.TypeOf(m))
continue
}
executors.Execute(func() {
processErr := sub.OnEachMessageFunc(m.Data.Key, m.Data.Value)
if processErr == nil {

View File

@@ -21,6 +21,7 @@ type ContentConfiguration struct {
Topic topic.Topic
Filter string
StartTime time.Time
StopTime time.Time
}
type OnEachMessageFunc func(key, value []byte) (err error)