merge current message queue code changes (#6201)

* listing files to convert to parquet

* write parquet files

* save logs into parquet files

* pass by value

* compact logs into parquet format

* can skip existing files

* refactor

* refactor

* fix compilation

* when no partition found

* refactor

* add untested parquet file read

* rename package

* refactor

* rename files

* remove unused

* add merged log read func

* parquet wants to know the file size

* rewind by time

* pass in stop ts

* add stop ts

* adjust log

* minor

* adjust log

* skip .parquet files when reading message logs

* skip non message files

* Update subscriber_record.go

* send messages

* skip message data with only ts

* skip non log files

* update parquet-go package

* ensure a valid record type

* add new field to a record type

* Update read_parquet_to_log.go

* fix parquet file name generation

* separating reading parquet and logs

* add key field

* add skipped logs

* use in memory cache

* refactor

* refactor

* refactor

* refactor, and change compact log

* refactor

* rename

* refactor

* fix format

* prefix v to version directory
This commit is contained in:
Chris Lu
2024-11-04 12:08:25 -08:00
committed by GitHub
parent ffe908371d
commit dc784bf217
33 changed files with 1106 additions and 264 deletions

View File

@@ -0,0 +1,41 @@
package logstore
import (
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util/log_buffer"
)
func GenMergedReadFunc(filerClient filer_pb.FilerClient, t topic.Topic, p topic.Partition) log_buffer.LogReadFromDiskFuncType {
fromParquetFn := GenParquetReadFunc(filerClient, t, p)
readLogDirectFn := GenLogOnDiskReadFunc(filerClient, t, p)
return mergeReadFuncs(fromParquetFn, readLogDirectFn)
}
func mergeReadFuncs(fromParquetFn, readLogDirectFn log_buffer.LogReadFromDiskFuncType) log_buffer.LogReadFromDiskFuncType {
var exhaustedParquet bool
var lastProcessedPosition log_buffer.MessagePosition
return func(startPosition log_buffer.MessagePosition, stopTsNs int64, eachLogEntryFn log_buffer.EachLogEntryFuncType) (lastReadPosition log_buffer.MessagePosition, isDone bool, err error) {
if !exhaustedParquet {
// glog.V(4).Infof("reading from parquet startPosition: %v\n", startPosition.UTC())
lastReadPosition, isDone, err = fromParquetFn(startPosition, stopTsNs, eachLogEntryFn)
// glog.V(4).Infof("read from parquet: %v %v %v %v\n", startPosition, lastReadPosition, isDone, err)
if isDone {
isDone = false
}
if err != nil {
return
}
lastProcessedPosition = lastReadPosition
}
exhaustedParquet = true
if startPosition.Before(lastProcessedPosition.Time) {
startPosition = lastProcessedPosition
}
// glog.V(4).Infof("reading from direct log startPosition: %v\n", startPosition.UTC())
lastReadPosition, isDone, err = readLogDirectFn(startPosition, stopTsNs, eachLogEntryFn)
return
}
}