Prune Query Engine (#8865)

* chore(weed/query/engine): prune unused functions

* chore(weed/query/engine): prune unused test-only function
This commit is contained in:
Lars Lehtonen
2026-03-31 20:53:41 -07:00
committed by GitHub
parent 75a6a34528
commit 80d3085d54
7 changed files with 0 additions and 737 deletions

View File

@@ -320,12 +320,6 @@ func (hms *HybridMessageScanner) ScanWithStats(ctx context.Context, options Hybr
return results, stats, nil
}
// scanUnflushedData queries brokers for unflushed in-memory data using buffer_start deduplication
func (hms *HybridMessageScanner) scanUnflushedData(ctx context.Context, partition topic.Partition, options HybridScanOptions) ([]HybridScanResult, error) {
results, _, err := hms.scanUnflushedDataWithStats(ctx, partition, options)
return results, err
}
// scanUnflushedDataWithStats queries brokers for unflushed data and returns statistics
func (hms *HybridMessageScanner) scanUnflushedDataWithStats(ctx context.Context, partition topic.Partition, options HybridScanOptions) ([]HybridScanResult, *HybridScanStats, error) {
var results []HybridScanResult
@@ -436,27 +430,6 @@ func (hms *HybridMessageScanner) scanUnflushedDataWithStats(ctx context.Context,
return results, stats, nil
}
// convertDataMessageToRecord converts mq_pb.DataMessage to schema_pb.RecordValue
func (hms *HybridMessageScanner) convertDataMessageToRecord(msg *mq_pb.DataMessage) (*schema_pb.RecordValue, string, error) {
// Parse the message data as RecordValue
recordValue := &schema_pb.RecordValue{}
if err := proto.Unmarshal(msg.Value, recordValue); err != nil {
return nil, "", fmt.Errorf("failed to unmarshal message data: %v", err)
}
// Add system columns
if recordValue.Fields == nil {
recordValue.Fields = make(map[string]*schema_pb.Value)
}
// Add timestamp
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{
Kind: &schema_pb.Value_Int64Value{Int64Value: msg.TsNs},
}
return recordValue, string(msg.Key), nil
}
// discoverTopicPartitions discovers the actual partitions for this topic by scanning the filesystem
// This finds real partition directories like v2025-09-01-07-16-34/0000-0630/
func (hms *HybridMessageScanner) discoverTopicPartitions(ctx context.Context) ([]topic.Partition, error) {
@@ -521,15 +494,6 @@ func (hms *HybridMessageScanner) discoverTopicPartitions(ctx context.Context) ([
return allPartitions, nil
}
// scanPartitionHybrid scans a specific partition using the hybrid approach
// This is where the magic happens - seamlessly reading ALL data sources:
// 1. Unflushed in-memory data from brokers (REAL-TIME)
// 2. Live logs + Parquet files from disk (FLUSHED/ARCHIVED)
func (hms *HybridMessageScanner) scanPartitionHybrid(ctx context.Context, partition topic.Partition, options HybridScanOptions) ([]HybridScanResult, error) {
results, _, err := hms.scanPartitionHybridWithStats(ctx, partition, options)
return results, err
}
// scanPartitionHybridWithStats scans a specific partition using streaming merge for memory efficiency
// PERFORMANCE IMPROVEMENT: Uses heap-based streaming merge instead of collecting all data and sorting
// - Memory usage: O(k) where k = number of data sources, instead of O(n) where n = total records
@@ -647,23 +611,6 @@ func (hms *HybridMessageScanner) countLiveLogFiles(partition topic.Partition) (i
return fileCount, nil
}
// isControlEntry checks if a log entry is a control entry without actual data
// Based on MQ system analysis, control entries are:
// 1. DataMessages with populated Ctrl field (publisher close signals)
// 2. Entries with empty keys (as filtered by subscriber)
// NOTE: Messages with empty data but valid keys (like NOOP messages) are NOT control entries
func (hms *HybridMessageScanner) isControlEntry(logEntry *filer_pb.LogEntry) bool {
// Pre-decode DataMessage if needed
var dataMessage *mq_pb.DataMessage
if len(logEntry.Data) > 0 {
dataMessage = &mq_pb.DataMessage{}
if err := proto.Unmarshal(logEntry.Data, dataMessage); err != nil {
dataMessage = nil // Failed to decode, treat as raw data
}
}
return hms.isControlEntryWithDecoded(logEntry, dataMessage)
}
// isControlEntryWithDecoded checks if a log entry is a control entry using pre-decoded DataMessage
// This avoids duplicate protobuf unmarshaling when the DataMessage is already decoded
func (hms *HybridMessageScanner) isControlEntryWithDecoded(logEntry *filer_pb.LogEntry, dataMessage *mq_pb.DataMessage) bool {
@@ -682,26 +629,6 @@ func (hms *HybridMessageScanner) isControlEntryWithDecoded(logEntry *filer_pb.Lo
return false
}
// isNullOrEmpty checks if a schema_pb.Value is null or empty
func isNullOrEmpty(value *schema_pb.Value) bool {
if value == nil {
return true
}
switch v := value.Kind.(type) {
case *schema_pb.Value_StringValue:
return v.StringValue == ""
case *schema_pb.Value_BytesValue:
return len(v.BytesValue) == 0
case *schema_pb.Value_ListValue:
return v.ListValue == nil || len(v.ListValue.Values) == 0
case nil:
return true // No kind set means null
default:
return false
}
}
// isSchemaless checks if the scanner is configured for a schema-less topic
// Schema-less topics only have system fields: _ts_ns, _key, and _value
func (hms *HybridMessageScanner) isSchemaless() bool {
@@ -736,61 +663,6 @@ func (hms *HybridMessageScanner) isSchemaless() bool {
return hasValue && dataFieldCount == 1
}
// convertLogEntryToRecordValue converts a filer_pb.LogEntry to schema_pb.RecordValue
// This handles both:
// 1. Live log entries (raw message format)
// 2. Parquet entries (already in schema_pb.RecordValue format)
// 3. Schema-less topics (raw bytes in _value field)
func (hms *HybridMessageScanner) convertLogEntryToRecordValue(logEntry *filer_pb.LogEntry) (*schema_pb.RecordValue, string, error) {
// For schema-less topics, put raw data directly into _value field
if hms.isSchemaless() {
recordValue := &schema_pb.RecordValue{
Fields: make(map[string]*schema_pb.Value),
}
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
}
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Key},
}
recordValue.Fields[SW_COLUMN_NAME_VALUE] = &schema_pb.Value{
Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Data},
}
return recordValue, "live_log", nil
}
// Try to unmarshal as RecordValue first (Parquet format)
recordValue := &schema_pb.RecordValue{}
if err := proto.Unmarshal(logEntry.Data, recordValue); err == nil {
// This is an archived message from Parquet files
// FIX: Add system columns from LogEntry to RecordValue
if recordValue.Fields == nil {
recordValue.Fields = make(map[string]*schema_pb.Value)
}
// Add system columns from LogEntry
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
}
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Key},
}
return recordValue, "parquet_archive", nil
}
// If not a RecordValue, this is raw live message data - parse with schema
return hms.parseRawMessageWithSchema(logEntry)
}
// min returns the minimum of two integers
func min(a, b int) int {
if a < b {
return a
}
return b
}
// parseRawMessageWithSchema parses raw live message data using the topic's schema
// This provides proper type conversion and field mapping instead of treating everything as strings
func (hms *HybridMessageScanner) parseRawMessageWithSchema(logEntry *filer_pb.LogEntry) (*schema_pb.RecordValue, string, error) {