chore: remove ~50k lines of unreachable dead code (#8913)

* chore: remove unreachable dead code across the codebase

Remove ~50,000 lines of unreachable code identified by static analysis.

Major removals:
- weed/filer/redis_lua: entire unused Redis Lua filer store implementation
- weed/wdclient/net2, resource_pool: unused connection/resource pool packages
- weed/plugin/worker/lifecycle: unused lifecycle plugin worker
- weed/s3api: unused S3 policy templates, presigned URL IAM, streaming copy,
  multipart IAM, key rotation, and various SSE helper functions
- weed/mq/kafka: unused partition mapping, compression, schema, and protocol functions
- weed/mq/offset: unused SQL storage and migration code
- weed/worker: unused registry, task, and monitoring functions
- weed/query: unused SQL engine, parquet scanner, and type functions
- weed/shell: unused EC proportional rebalance functions
- weed/storage/erasure_coding/distribution: unused distribution analysis functions
- Individual unreachable functions removed from 150+ files across admin,
  credential, filer, iam, kms, mount, mq, operation, pb, s3api, server,
  shell, storage, topology, and util packages

* fix(s3): reset shared memory store in IAM test to prevent flaky failure

TestLoadIAMManagerFromConfig_EmptyConfigWithFallbackKey was flaky because
the MemoryStore credential backend is a singleton registered via init().
Earlier tests that create anonymous identities pollute the shared store,
causing LookupAnonymous() to unexpectedly return true.

Fix by calling Reset() on the memory store before the test runs.

* style: run gofmt on changed files

* fix: restore KMS functions used by integration tests

* fix(plugin): prevent panic on send to closed worker session channel

The Plugin.sendToWorker method could panic with "send on closed channel"
when a worker disconnected while a message was being sent. The race was
between streamSession.close() closing the outgoing channel and sendToWorker
writing to it concurrently.

Add a done channel to streamSession that is closed before the outgoing
channel, and check it in sendToWorker's select to safely detect closed
sessions without panicking.
This commit is contained in:
Chris Lu
2026-04-03 16:04:27 -07:00
committed by GitHub
parent 8fad85aed7
commit 995dfc4d5d
264 changed files with 62 additions and 46027 deletions

View File

@@ -74,11 +74,6 @@ func (opt *FastPathOptimizer) DetermineStrategy(aggregations []AggregationSpec)
return strategy
}
// CollectDataSources gathers information about available data sources for a topic
func (opt *FastPathOptimizer) CollectDataSources(ctx context.Context, hybridScanner *HybridMessageScanner) (*TopicDataSources, error) {
return opt.CollectDataSourcesWithTimeFilter(ctx, hybridScanner, 0, 0)
}
// CollectDataSourcesWithTimeFilter gathers information about available data sources for a topic
// with optional time filtering to skip irrelevant parquet files
func (opt *FastPathOptimizer) CollectDataSourcesWithTimeFilter(ctx context.Context, hybridScanner *HybridMessageScanner, startTimeNs, stopTimeNs int64) (*TopicDataSources, error) {

View File

@@ -539,20 +539,6 @@ func NewSQLEngine(masterAddress string) *SQLEngine {
}
}
// NewSQLEngineWithCatalog creates a new SQL execution engine with a custom catalog
// Used for testing or when you want to provide a pre-configured catalog
func NewSQLEngineWithCatalog(catalog *SchemaCatalog) *SQLEngine {
// Initialize global HTTP client if not already done
// This is needed for reading partition data from the filer
if util_http.GetGlobalHttpClient() == nil {
util_http.InitGlobalHttpClient()
}
return &SQLEngine{
catalog: catalog,
}
}
// GetCatalog returns the schema catalog for external access
func (e *SQLEngine) GetCatalog() *SchemaCatalog {
return e.catalog
@@ -3682,11 +3668,6 @@ type ExecutionPlanBuilder struct {
engine *SQLEngine
}
// NewExecutionPlanBuilder creates a new execution plan builder
func NewExecutionPlanBuilder(engine *SQLEngine) *ExecutionPlanBuilder {
return &ExecutionPlanBuilder{engine: engine}
}
// BuildAggregationPlan builds an execution plan for aggregation queries
func (builder *ExecutionPlanBuilder) BuildAggregationPlan(
stmt *SelectStatement,

File diff suppressed because it is too large Load Diff

View File

@@ -44,7 +44,7 @@ type ParseError struct {
func (e ParseError) Error() string {
if e.Cause != nil {
return fmt.Sprintf("SQL parse error: %s (%v)", e.Message, e.Cause)
return fmt.Sprintf("SQL parse error: %s (caused by: %v)", e.Message, e.Cause)
}
return fmt.Sprintf("SQL parse error: %s", e.Message)
}

View File

@@ -1,133 +0,0 @@
package engine
import (
"testing"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"github.com/stretchr/testify/assert"
)
// TestExecutionPlanFastPathDisplay tests that the execution plan correctly shows
// "Parquet Statistics (fast path)" when fast path is used, not "Parquet Files (full scan)"
func TestExecutionPlanFastPathDisplay(t *testing.T) {
engine := NewMockSQLEngine()
// Create realistic data sources for fast path scenario
dataSources := &TopicDataSources{
ParquetFiles: map[string][]*ParquetFileStats{
"/topics/test/topic/partition-1": {
{
RowCount: 500,
ColumnStats: map[string]*ParquetColumnStats{
"id": {
ColumnName: "id",
MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1}},
MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 500}},
NullCount: 0,
RowCount: 500,
},
},
},
},
},
ParquetRowCount: 500,
LiveLogRowCount: 0, // Pure parquet scenario - ideal for fast path
PartitionsCount: 1,
}
t.Run("Fast path execution plan shows correct data sources", func(t *testing.T) {
optimizer := NewFastPathOptimizer(engine.SQLEngine)
aggregations := []AggregationSpec{
{Function: FuncCOUNT, Column: "*", Alias: "COUNT(*)"},
}
// Test the strategy determination
strategy := optimizer.DetermineStrategy(aggregations)
assert.True(t, strategy.CanUseFastPath, "Strategy should allow fast path for COUNT(*)")
assert.Equal(t, "all_aggregations_supported", strategy.Reason)
// Test data source list building
builder := &ExecutionPlanBuilder{}
dataSources := &TopicDataSources{
ParquetFiles: map[string][]*ParquetFileStats{
"/topics/test/topic/partition-1": {
{RowCount: 500},
},
},
ParquetRowCount: 500,
LiveLogRowCount: 0,
PartitionsCount: 1,
}
dataSourcesList := builder.buildDataSourcesList(strategy, dataSources)
// When fast path is used, should show "parquet_stats" not "parquet_files"
assert.Contains(t, dataSourcesList, "parquet_stats",
"Data sources should contain 'parquet_stats' when fast path is used")
assert.NotContains(t, dataSourcesList, "parquet_files",
"Data sources should NOT contain 'parquet_files' when fast path is used")
// Test that the formatting works correctly
formattedSource := engine.SQLEngine.formatDataSource("parquet_stats")
assert.Equal(t, "Parquet Statistics (fast path)", formattedSource,
"parquet_stats should format to 'Parquet Statistics (fast path)'")
formattedFullScan := engine.SQLEngine.formatDataSource("parquet_files")
assert.Equal(t, "Parquet Files (full scan)", formattedFullScan,
"parquet_files should format to 'Parquet Files (full scan)'")
})
t.Run("Slow path execution plan shows full scan data sources", func(t *testing.T) {
builder := &ExecutionPlanBuilder{}
// Create strategy that cannot use fast path
strategy := AggregationStrategy{
CanUseFastPath: false,
Reason: "unsupported_aggregation_functions",
}
dataSourcesList := builder.buildDataSourcesList(strategy, dataSources)
// When slow path is used, should show "parquet_files" and "live_logs"
assert.Contains(t, dataSourcesList, "parquet_files",
"Slow path should contain 'parquet_files'")
assert.Contains(t, dataSourcesList, "live_logs",
"Slow path should contain 'live_logs'")
assert.NotContains(t, dataSourcesList, "parquet_stats",
"Slow path should NOT contain 'parquet_stats'")
})
t.Run("Data source formatting works correctly", func(t *testing.T) {
// Test just the data source formatting which is the key fix
// Test parquet_stats formatting (fast path)
fastPathFormatted := engine.SQLEngine.formatDataSource("parquet_stats")
assert.Equal(t, "Parquet Statistics (fast path)", fastPathFormatted,
"parquet_stats should format to show fast path usage")
// Test parquet_files formatting (slow path)
slowPathFormatted := engine.SQLEngine.formatDataSource("parquet_files")
assert.Equal(t, "Parquet Files (full scan)", slowPathFormatted,
"parquet_files should format to show full scan")
// Test that data sources list is built correctly for fast path
builder := &ExecutionPlanBuilder{}
fastStrategy := AggregationStrategy{CanUseFastPath: true}
fastSources := builder.buildDataSourcesList(fastStrategy, dataSources)
assert.Contains(t, fastSources, "parquet_stats",
"Fast path should include parquet_stats")
assert.NotContains(t, fastSources, "parquet_files",
"Fast path should NOT include parquet_files")
// Test that data sources list is built correctly for slow path
slowStrategy := AggregationStrategy{CanUseFastPath: false}
slowSources := builder.buildDataSourcesList(slowStrategy, dataSources)
assert.Contains(t, slowSources, "parquet_files",
"Slow path should include parquet_files")
assert.NotContains(t, slowSources, "parquet_stats",
"Slow path should NOT include parquet_stats")
})
}

View File

@@ -1,193 +0,0 @@
package engine
import (
"context"
"testing"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"github.com/stretchr/testify/assert"
)
// TestFastPathCountFixRealistic tests the specific scenario mentioned in the bug report:
// Fast path returning 0 for COUNT(*) when slow path returns 1803
func TestFastPathCountFixRealistic(t *testing.T) {
engine := NewMockSQLEngine()
// Set up debug mode to see our new logging
ctx := context.WithValue(context.Background(), "debug", true)
// Create realistic data sources that mimic a scenario with 1803 rows
dataSources := &TopicDataSources{
ParquetFiles: map[string][]*ParquetFileStats{
"/topics/test/large-topic/0000-1023": {
{
RowCount: 800,
ColumnStats: map[string]*ParquetColumnStats{
"id": {
ColumnName: "id",
MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1}},
MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 800}},
NullCount: 0,
RowCount: 800,
},
},
},
{
RowCount: 500,
ColumnStats: map[string]*ParquetColumnStats{
"id": {
ColumnName: "id",
MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 801}},
MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1300}},
NullCount: 0,
RowCount: 500,
},
},
},
},
"/topics/test/large-topic/1024-2047": {
{
RowCount: 300,
ColumnStats: map[string]*ParquetColumnStats{
"id": {
ColumnName: "id",
MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1301}},
MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1600}},
NullCount: 0,
RowCount: 300,
},
},
},
},
},
ParquetRowCount: 1600, // 800 + 500 + 300
LiveLogRowCount: 203, // Additional live log data
PartitionsCount: 2,
LiveLogFilesCount: 15,
}
partitions := []string{
"/topics/test/large-topic/0000-1023",
"/topics/test/large-topic/1024-2047",
}
t.Run("COUNT(*) should return correct total (1803)", func(t *testing.T) {
computer := NewAggregationComputer(engine.SQLEngine)
aggregations := []AggregationSpec{
{Function: FuncCOUNT, Column: "*", Alias: "COUNT(*)"},
}
results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)
assert.NoError(t, err, "Fast path aggregation should not error")
assert.Len(t, results, 1, "Should return one result")
// This is the key test - before our fix, this was returning 0
expectedCount := int64(1803) // 1600 (parquet) + 203 (live log)
actualCount := results[0].Count
assert.Equal(t, expectedCount, actualCount,
"COUNT(*) should return %d (1600 parquet + 203 live log), but got %d",
expectedCount, actualCount)
})
t.Run("MIN/MAX should work with multiple partitions", func(t *testing.T) {
computer := NewAggregationComputer(engine.SQLEngine)
aggregations := []AggregationSpec{
{Function: FuncMIN, Column: "id", Alias: "MIN(id)"},
{Function: FuncMAX, Column: "id", Alias: "MAX(id)"},
}
results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)
assert.NoError(t, err, "Fast path aggregation should not error")
assert.Len(t, results, 2, "Should return two results")
// MIN should be the lowest across all parquet files
assert.Equal(t, int64(1), results[0].Min, "MIN should be 1")
// MAX should be the highest across all parquet files
assert.Equal(t, int64(1600), results[1].Max, "MAX should be 1600")
})
}
// TestFastPathDataSourceDiscoveryLogging tests that our debug logging works correctly
func TestFastPathDataSourceDiscoveryLogging(t *testing.T) {
// This test verifies that our enhanced data source collection structure is correct
t.Run("DataSources structure validation", func(t *testing.T) {
// Test the TopicDataSources structure initialization
dataSources := &TopicDataSources{
ParquetFiles: make(map[string][]*ParquetFileStats),
ParquetRowCount: 0,
LiveLogRowCount: 0,
LiveLogFilesCount: 0,
PartitionsCount: 0,
}
assert.NotNil(t, dataSources, "Data sources should not be nil")
assert.NotNil(t, dataSources.ParquetFiles, "ParquetFiles map should be initialized")
assert.GreaterOrEqual(t, dataSources.PartitionsCount, 0, "PartitionsCount should be non-negative")
assert.GreaterOrEqual(t, dataSources.ParquetRowCount, int64(0), "ParquetRowCount should be non-negative")
assert.GreaterOrEqual(t, dataSources.LiveLogRowCount, int64(0), "LiveLogRowCount should be non-negative")
})
}
// TestFastPathValidationLogic tests the enhanced validation we added
func TestFastPathValidationLogic(t *testing.T) {
t.Run("Validation catches data source vs computation mismatch", func(t *testing.T) {
// Create a scenario where data sources and computation might be inconsistent
dataSources := &TopicDataSources{
ParquetFiles: make(map[string][]*ParquetFileStats),
ParquetRowCount: 1000, // Data sources say 1000 rows
LiveLogRowCount: 0,
PartitionsCount: 1,
}
// But aggregation result says different count (simulating the original bug)
aggResults := []AggregationResult{
{Count: 0}, // Bug: returns 0 when data sources show 1000
}
// This simulates the validation logic from tryFastParquetAggregation
totalRows := dataSources.ParquetRowCount + dataSources.LiveLogRowCount
countResult := aggResults[0].Count
// Our validation should catch this mismatch
assert.NotEqual(t, totalRows, countResult,
"This test simulates the bug: data sources show %d but COUNT returns %d",
totalRows, countResult)
// In the real code, this would trigger a fallback to slow path
validationPassed := (countResult == totalRows)
assert.False(t, validationPassed, "Validation should fail for inconsistent data")
})
t.Run("Validation passes for consistent data", func(t *testing.T) {
// Create a scenario where everything is consistent
dataSources := &TopicDataSources{
ParquetFiles: make(map[string][]*ParquetFileStats),
ParquetRowCount: 1000,
LiveLogRowCount: 803,
PartitionsCount: 1,
}
// Aggregation result matches data sources
aggResults := []AggregationResult{
{Count: 1803}, // Correct: matches 1000 + 803
}
totalRows := dataSources.ParquetRowCount + dataSources.LiveLogRowCount
countResult := aggResults[0].Count
// Our validation should pass this
assert.Equal(t, totalRows, countResult,
"Validation should pass when data sources (%d) match COUNT result (%d)",
totalRows, countResult)
validationPassed := (countResult == totalRows)
assert.True(t, validationPassed, "Validation should pass for consistent data")
})
}

View File

@@ -1,280 +1,14 @@
package engine
import (
"context"
"fmt"
"math/big"
"time"
"github.com/seaweedfs/seaweedfs/weed/mq/schema"
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"github.com/seaweedfs/seaweedfs/weed/query/sqltypes"
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
)
// ParquetScanner scans MQ topic Parquet files for SELECT queries
// Assumptions:
// 1. All MQ messages are stored in Parquet format in topic partitions
// 2. Each partition directory contains dated Parquet files
// 3. System columns (_ts_ns, _key) are added to user schema
// 4. Predicate pushdown is used for efficient scanning
type ParquetScanner struct {
filerClient filer_pb.FilerClient
chunkCache chunk_cache.ChunkCache
topic topic.Topic
recordSchema *schema_pb.RecordType
parquetLevels *schema.ParquetLevels
}
// NewParquetScanner creates a scanner for a specific MQ topic
// Assumption: Topic exists and has Parquet files in partition directories
func NewParquetScanner(filerClient filer_pb.FilerClient, namespace, topicName string) (*ParquetScanner, error) {
// Check if filerClient is available
if filerClient == nil {
return nil, fmt.Errorf("filerClient is required but not available")
}
// Create topic reference
t := topic.Topic{
Namespace: namespace,
Name: topicName,
}
// Read topic configuration to get schema
var topicConf *mq_pb.ConfigureTopicResponse
var err error
if err := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
topicConf, err = t.ReadConfFile(client)
return err
}); err != nil {
return nil, fmt.Errorf("failed to read topic config: %v", err)
}
// Build complete schema with system columns - prefer flat schema if available
var recordType *schema_pb.RecordType
if topicConf.GetMessageRecordType() != nil {
// New flat schema format - use directly
recordType = topicConf.GetMessageRecordType()
}
if recordType == nil || len(recordType.Fields) == 0 {
// For topics without schema, create a minimal schema with system fields and _value
recordType = schema.RecordTypeBegin().
WithField(SW_COLUMN_NAME_TIMESTAMP, schema.TypeInt64).
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
WithField(SW_COLUMN_NAME_VALUE, schema.TypeBytes). // Raw message value
RecordTypeEnd()
} else {
// Add system columns that MQ adds to all records
recordType = schema.NewRecordTypeBuilder(recordType).
WithField(SW_COLUMN_NAME_TIMESTAMP, schema.TypeInt64).
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
RecordTypeEnd()
}
// Convert to Parquet levels for efficient reading
parquetLevels, err := schema.ToParquetLevels(recordType)
if err != nil {
return nil, fmt.Errorf("failed to create Parquet levels: %v", err)
}
return &ParquetScanner{
filerClient: filerClient,
chunkCache: chunk_cache.NewChunkCacheInMemory(256), // Same as MQ logstore
topic: t,
recordSchema: recordType,
parquetLevels: parquetLevels,
}, nil
}
// ScanOptions configure how the scanner reads data
type ScanOptions struct {
// Time range filtering (Unix nanoseconds)
StartTimeNs int64
StopTimeNs int64
// Column projection - if empty, select all columns
Columns []string
// Row limit - 0 means no limit
Limit int
// Predicate for WHERE clause filtering
Predicate func(*schema_pb.RecordValue) bool
}
// ScanResult represents a single scanned record
type ScanResult struct {
Values map[string]*schema_pb.Value // Column name -> value
Timestamp int64 // Message timestamp (_ts_ns)
Key []byte // Message key (_key)
}
// Scan reads records from the topic's Parquet files
// Assumptions:
// 1. Scans all partitions of the topic
// 2. Applies time filtering at Parquet level for efficiency
// 3. Applies predicates and projections after reading
func (ps *ParquetScanner) Scan(ctx context.Context, options ScanOptions) ([]ScanResult, error) {
var results []ScanResult
// Get all partitions for this topic
// TODO: Implement proper partition discovery
// For now, assume partition 0 exists
partitions := []topic.Partition{{RangeStart: 0, RangeStop: 1000}}
for _, partition := range partitions {
partitionResults, err := ps.scanPartition(ctx, partition, options)
if err != nil {
return nil, fmt.Errorf("failed to scan partition %v: %v", partition, err)
}
results = append(results, partitionResults...)
// Apply global limit across all partitions
if options.Limit > 0 && len(results) >= options.Limit {
results = results[:options.Limit]
break
}
}
return results, nil
}
// scanPartition scans a specific topic partition
func (ps *ParquetScanner) scanPartition(ctx context.Context, partition topic.Partition, options ScanOptions) ([]ScanResult, error) {
// partitionDir := topic.PartitionDir(ps.topic, partition) // TODO: Use for actual file listing
var results []ScanResult
// List Parquet files in partition directory
// TODO: Implement proper file listing with date range filtering
// For now, this is a placeholder that would list actual Parquet files
// Simulate file processing - in real implementation, this would:
// 1. List files in partitionDir via filerClient
// 2. Filter files by date range if time filtering is enabled
// 3. Process each Parquet file in chronological order
// Placeholder: Create sample data for testing
if len(results) == 0 {
// Generate sample data for demonstration
sampleData := ps.generateSampleData(options)
results = append(results, sampleData...)
}
return results, nil
}
// generateSampleData creates sample data for testing when no real Parquet files exist
func (ps *ParquetScanner) generateSampleData(options ScanOptions) []ScanResult {
now := time.Now().UnixNano()
sampleData := []ScanResult{
{
Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "login"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "192.168.1.1"}`}},
},
Timestamp: now - 3600000000000, // 1 hour ago
Key: []byte("user-1001"),
},
{
Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1002}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "page_view"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"page": "/dashboard"}`}},
},
Timestamp: now - 1800000000000, // 30 minutes ago
Key: []byte("user-1002"),
},
{
Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "logout"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"session_duration": 3600}`}},
},
Timestamp: now - 900000000000, // 15 minutes ago
Key: []byte("user-1001"),
},
}
// Apply predicate filtering if specified
if options.Predicate != nil {
var filtered []ScanResult
for _, result := range sampleData {
// Convert to RecordValue for predicate testing
recordValue := &schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
for k, v := range result.Values {
recordValue.Fields[k] = v
}
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
if options.Predicate(recordValue) {
filtered = append(filtered, result)
}
}
sampleData = filtered
}
// Apply limit
if options.Limit > 0 && len(sampleData) > options.Limit {
sampleData = sampleData[:options.Limit]
}
return sampleData
}
// ConvertToSQLResult converts ScanResults to SQL query results
func (ps *ParquetScanner) ConvertToSQLResult(results []ScanResult, columns []string) *QueryResult {
if len(results) == 0 {
return &QueryResult{
Columns: columns,
Rows: [][]sqltypes.Value{},
}
}
// Determine columns if not specified
if len(columns) == 0 {
columnSet := make(map[string]bool)
for _, result := range results {
for columnName := range result.Values {
columnSet[columnName] = true
}
}
columns = make([]string, 0, len(columnSet))
for columnName := range columnSet {
columns = append(columns, columnName)
}
}
// Convert to SQL rows
rows := make([][]sqltypes.Value, len(results))
for i, result := range results {
row := make([]sqltypes.Value, len(columns))
for j, columnName := range columns {
if value, exists := result.Values[columnName]; exists {
row[j] = convertSchemaValueToSQL(value)
} else {
row[j] = sqltypes.NULL
}
}
rows[i] = row
}
return &QueryResult{
Columns: columns,
Rows: rows,
}
}
// convertSchemaValueToSQL converts schema_pb.Value to sqltypes.Value
func convertSchemaValueToSQL(value *schema_pb.Value) sqltypes.Value {
if value == nil {

View File

@@ -1,117 +0,0 @@
package engine
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
// TestPartitionPathHandling tests that partition paths are handled correctly
// whether discoverTopicPartitions returns relative or absolute paths
func TestPartitionPathHandling(t *testing.T) {
engine := NewMockSQLEngine()
t.Run("Mock discoverTopicPartitions returns correct paths", func(t *testing.T) {
// Test that our mock engine handles absolute paths correctly
engine.mockPartitions["test.user_events"] = []string{
"/topics/test/user_events/v2025-09-03-15-36-29/0000-2520",
"/topics/test/user_events/v2025-09-03-15-36-29/2521-5040",
}
partitions, err := engine.discoverTopicPartitions("test", "user_events")
assert.NoError(t, err, "Should discover partitions without error")
assert.Equal(t, 2, len(partitions), "Should return 2 partitions")
assert.Contains(t, partitions[0], "/topics/test/user_events/", "Should contain absolute path")
})
t.Run("Mock discoverTopicPartitions handles relative paths", func(t *testing.T) {
// Test relative paths scenario
engine.mockPartitions["test.user_events"] = []string{
"v2025-09-03-15-36-29/0000-2520",
"v2025-09-03-15-36-29/2521-5040",
}
partitions, err := engine.discoverTopicPartitions("test", "user_events")
assert.NoError(t, err, "Should discover partitions without error")
assert.Equal(t, 2, len(partitions), "Should return 2 partitions")
assert.True(t, !strings.HasPrefix(partitions[0], "/topics/"), "Should be relative path")
})
t.Run("Partition path building logic works correctly", func(t *testing.T) {
topicBasePath := "/topics/test/user_events"
testCases := []struct {
name string
relativePartition string
expectedPath string
}{
{
name: "Absolute path - use as-is",
relativePartition: "/topics/test/user_events/v2025-09-03-15-36-29/0000-2520",
expectedPath: "/topics/test/user_events/v2025-09-03-15-36-29/0000-2520",
},
{
name: "Relative path - build full path",
relativePartition: "v2025-09-03-15-36-29/0000-2520",
expectedPath: "/topics/test/user_events/v2025-09-03-15-36-29/0000-2520",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var partitionPath string
// This is the same logic from our fixed code
if strings.HasPrefix(tc.relativePartition, "/topics/") {
// Already a full path - use as-is
partitionPath = tc.relativePartition
} else {
// Relative path - build full path
partitionPath = topicBasePath + "/" + tc.relativePartition
}
assert.Equal(t, tc.expectedPath, partitionPath,
"Partition path should be built correctly")
// Ensure no double slashes
assert.NotContains(t, partitionPath, "//",
"Partition path should not contain double slashes")
})
}
})
}
// TestPartitionPathLogic tests the core logic for handling partition paths
func TestPartitionPathLogic(t *testing.T) {
t.Run("Building partition paths from discovered partitions", func(t *testing.T) {
// Test the specific partition path building that was causing issues
topicBasePath := "/topics/ecommerce/user_events"
// This simulates the discoverTopicPartitions returning absolute paths (realistic scenario)
relativePartitions := []string{
"/topics/ecommerce/user_events/v2025-09-03-15-36-29/0000-2520",
}
// This is the code from our fix - test it directly
partitions := make([]string, len(relativePartitions))
for i, relPartition := range relativePartitions {
// Handle both relative and absolute partition paths from discoverTopicPartitions
if strings.HasPrefix(relPartition, "/topics/") {
// Already a full path - use as-is
partitions[i] = relPartition
} else {
// Relative path - build full path
partitions[i] = topicBasePath + "/" + relPartition
}
}
// Verify the path was handled correctly
expectedPath := "/topics/ecommerce/user_events/v2025-09-03-15-36-29/0000-2520"
assert.Equal(t, expectedPath, partitions[0], "Absolute path should be used as-is")
// Ensure no double slashes (this was the original bug)
assert.NotContains(t, partitions[0], "//", "Path should not contain double slashes")
})
}