chore: remove ~50k lines of unreachable dead code (#8913)
* chore: remove unreachable dead code across the codebase Remove ~50,000 lines of unreachable code identified by static analysis. Major removals: - weed/filer/redis_lua: entire unused Redis Lua filer store implementation - weed/wdclient/net2, resource_pool: unused connection/resource pool packages - weed/plugin/worker/lifecycle: unused lifecycle plugin worker - weed/s3api: unused S3 policy templates, presigned URL IAM, streaming copy, multipart IAM, key rotation, and various SSE helper functions - weed/mq/kafka: unused partition mapping, compression, schema, and protocol functions - weed/mq/offset: unused SQL storage and migration code - weed/worker: unused registry, task, and monitoring functions - weed/query: unused SQL engine, parquet scanner, and type functions - weed/shell: unused EC proportional rebalance functions - weed/storage/erasure_coding/distribution: unused distribution analysis functions - Individual unreachable functions removed from 150+ files across admin, credential, filer, iam, kms, mount, mq, operation, pb, s3api, server, shell, storage, topology, and util packages * fix(s3): reset shared memory store in IAM test to prevent flaky failure TestLoadIAMManagerFromConfig_EmptyConfigWithFallbackKey was flaky because the MemoryStore credential backend is a singleton registered via init(). Earlier tests that create anonymous identities pollute the shared store, causing LookupAnonymous() to unexpectedly return true. Fix by calling Reset() on the memory store before the test runs. * style: run gofmt on changed files * fix: restore KMS functions used by integration tests * fix(plugin): prevent panic on send to closed worker session channel The Plugin.sendToWorker method could panic with "send on closed channel" when a worker disconnected while a message was being sent. The race was between streamSession.close() closing the outgoing channel and sendToWorker writing to it concurrently. Add a done channel to streamSession that is closed before the outgoing channel, and check it in sendToWorker's select to safely detect closed sessions without panicking.
This commit is contained in:
@@ -1,280 +1,14 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/mq/schema"
|
||||
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/query/sqltypes"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
|
||||
)
|
||||
|
||||
// ParquetScanner scans MQ topic Parquet files for SELECT queries
|
||||
// Assumptions:
|
||||
// 1. All MQ messages are stored in Parquet format in topic partitions
|
||||
// 2. Each partition directory contains dated Parquet files
|
||||
// 3. System columns (_ts_ns, _key) are added to user schema
|
||||
// 4. Predicate pushdown is used for efficient scanning
|
||||
type ParquetScanner struct {
|
||||
filerClient filer_pb.FilerClient
|
||||
chunkCache chunk_cache.ChunkCache
|
||||
topic topic.Topic
|
||||
recordSchema *schema_pb.RecordType
|
||||
parquetLevels *schema.ParquetLevels
|
||||
}
|
||||
|
||||
// NewParquetScanner creates a scanner for a specific MQ topic
|
||||
// Assumption: Topic exists and has Parquet files in partition directories
|
||||
func NewParquetScanner(filerClient filer_pb.FilerClient, namespace, topicName string) (*ParquetScanner, error) {
|
||||
// Check if filerClient is available
|
||||
if filerClient == nil {
|
||||
return nil, fmt.Errorf("filerClient is required but not available")
|
||||
}
|
||||
|
||||
// Create topic reference
|
||||
t := topic.Topic{
|
||||
Namespace: namespace,
|
||||
Name: topicName,
|
||||
}
|
||||
|
||||
// Read topic configuration to get schema
|
||||
var topicConf *mq_pb.ConfigureTopicResponse
|
||||
var err error
|
||||
if err := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
||||
topicConf, err = t.ReadConfFile(client)
|
||||
return err
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("failed to read topic config: %v", err)
|
||||
}
|
||||
|
||||
// Build complete schema with system columns - prefer flat schema if available
|
||||
var recordType *schema_pb.RecordType
|
||||
|
||||
if topicConf.GetMessageRecordType() != nil {
|
||||
// New flat schema format - use directly
|
||||
recordType = topicConf.GetMessageRecordType()
|
||||
}
|
||||
|
||||
if recordType == nil || len(recordType.Fields) == 0 {
|
||||
// For topics without schema, create a minimal schema with system fields and _value
|
||||
recordType = schema.RecordTypeBegin().
|
||||
WithField(SW_COLUMN_NAME_TIMESTAMP, schema.TypeInt64).
|
||||
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
|
||||
WithField(SW_COLUMN_NAME_VALUE, schema.TypeBytes). // Raw message value
|
||||
RecordTypeEnd()
|
||||
} else {
|
||||
// Add system columns that MQ adds to all records
|
||||
recordType = schema.NewRecordTypeBuilder(recordType).
|
||||
WithField(SW_COLUMN_NAME_TIMESTAMP, schema.TypeInt64).
|
||||
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
|
||||
RecordTypeEnd()
|
||||
}
|
||||
|
||||
// Convert to Parquet levels for efficient reading
|
||||
parquetLevels, err := schema.ToParquetLevels(recordType)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create Parquet levels: %v", err)
|
||||
}
|
||||
|
||||
return &ParquetScanner{
|
||||
filerClient: filerClient,
|
||||
chunkCache: chunk_cache.NewChunkCacheInMemory(256), // Same as MQ logstore
|
||||
topic: t,
|
||||
recordSchema: recordType,
|
||||
parquetLevels: parquetLevels,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ScanOptions configure how the scanner reads data
|
||||
type ScanOptions struct {
|
||||
// Time range filtering (Unix nanoseconds)
|
||||
StartTimeNs int64
|
||||
StopTimeNs int64
|
||||
|
||||
// Column projection - if empty, select all columns
|
||||
Columns []string
|
||||
|
||||
// Row limit - 0 means no limit
|
||||
Limit int
|
||||
|
||||
// Predicate for WHERE clause filtering
|
||||
Predicate func(*schema_pb.RecordValue) bool
|
||||
}
|
||||
|
||||
// ScanResult represents a single scanned record
|
||||
type ScanResult struct {
|
||||
Values map[string]*schema_pb.Value // Column name -> value
|
||||
Timestamp int64 // Message timestamp (_ts_ns)
|
||||
Key []byte // Message key (_key)
|
||||
}
|
||||
|
||||
// Scan reads records from the topic's Parquet files
|
||||
// Assumptions:
|
||||
// 1. Scans all partitions of the topic
|
||||
// 2. Applies time filtering at Parquet level for efficiency
|
||||
// 3. Applies predicates and projections after reading
|
||||
func (ps *ParquetScanner) Scan(ctx context.Context, options ScanOptions) ([]ScanResult, error) {
|
||||
var results []ScanResult
|
||||
|
||||
// Get all partitions for this topic
|
||||
// TODO: Implement proper partition discovery
|
||||
// For now, assume partition 0 exists
|
||||
partitions := []topic.Partition{{RangeStart: 0, RangeStop: 1000}}
|
||||
|
||||
for _, partition := range partitions {
|
||||
partitionResults, err := ps.scanPartition(ctx, partition, options)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan partition %v: %v", partition, err)
|
||||
}
|
||||
|
||||
results = append(results, partitionResults...)
|
||||
|
||||
// Apply global limit across all partitions
|
||||
if options.Limit > 0 && len(results) >= options.Limit {
|
||||
results = results[:options.Limit]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// scanPartition scans a specific topic partition
|
||||
func (ps *ParquetScanner) scanPartition(ctx context.Context, partition topic.Partition, options ScanOptions) ([]ScanResult, error) {
|
||||
// partitionDir := topic.PartitionDir(ps.topic, partition) // TODO: Use for actual file listing
|
||||
|
||||
var results []ScanResult
|
||||
|
||||
// List Parquet files in partition directory
|
||||
// TODO: Implement proper file listing with date range filtering
|
||||
// For now, this is a placeholder that would list actual Parquet files
|
||||
|
||||
// Simulate file processing - in real implementation, this would:
|
||||
// 1. List files in partitionDir via filerClient
|
||||
// 2. Filter files by date range if time filtering is enabled
|
||||
// 3. Process each Parquet file in chronological order
|
||||
|
||||
// Placeholder: Create sample data for testing
|
||||
if len(results) == 0 {
|
||||
// Generate sample data for demonstration
|
||||
sampleData := ps.generateSampleData(options)
|
||||
results = append(results, sampleData...)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// generateSampleData creates sample data for testing when no real Parquet files exist
|
||||
func (ps *ParquetScanner) generateSampleData(options ScanOptions) []ScanResult {
|
||||
now := time.Now().UnixNano()
|
||||
|
||||
sampleData := []ScanResult{
|
||||
{
|
||||
Values: map[string]*schema_pb.Value{
|
||||
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
|
||||
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "login"}},
|
||||
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "192.168.1.1"}`}},
|
||||
},
|
||||
Timestamp: now - 3600000000000, // 1 hour ago
|
||||
Key: []byte("user-1001"),
|
||||
},
|
||||
{
|
||||
Values: map[string]*schema_pb.Value{
|
||||
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1002}},
|
||||
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "page_view"}},
|
||||
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"page": "/dashboard"}`}},
|
||||
},
|
||||
Timestamp: now - 1800000000000, // 30 minutes ago
|
||||
Key: []byte("user-1002"),
|
||||
},
|
||||
{
|
||||
Values: map[string]*schema_pb.Value{
|
||||
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
|
||||
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "logout"}},
|
||||
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"session_duration": 3600}`}},
|
||||
},
|
||||
Timestamp: now - 900000000000, // 15 minutes ago
|
||||
Key: []byte("user-1001"),
|
||||
},
|
||||
}
|
||||
|
||||
// Apply predicate filtering if specified
|
||||
if options.Predicate != nil {
|
||||
var filtered []ScanResult
|
||||
for _, result := range sampleData {
|
||||
// Convert to RecordValue for predicate testing
|
||||
recordValue := &schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
|
||||
for k, v := range result.Values {
|
||||
recordValue.Fields[k] = v
|
||||
}
|
||||
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
|
||||
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
|
||||
|
||||
if options.Predicate(recordValue) {
|
||||
filtered = append(filtered, result)
|
||||
}
|
||||
}
|
||||
sampleData = filtered
|
||||
}
|
||||
|
||||
// Apply limit
|
||||
if options.Limit > 0 && len(sampleData) > options.Limit {
|
||||
sampleData = sampleData[:options.Limit]
|
||||
}
|
||||
|
||||
return sampleData
|
||||
}
|
||||
|
||||
// ConvertToSQLResult converts ScanResults to SQL query results
|
||||
func (ps *ParquetScanner) ConvertToSQLResult(results []ScanResult, columns []string) *QueryResult {
|
||||
if len(results) == 0 {
|
||||
return &QueryResult{
|
||||
Columns: columns,
|
||||
Rows: [][]sqltypes.Value{},
|
||||
}
|
||||
}
|
||||
|
||||
// Determine columns if not specified
|
||||
if len(columns) == 0 {
|
||||
columnSet := make(map[string]bool)
|
||||
for _, result := range results {
|
||||
for columnName := range result.Values {
|
||||
columnSet[columnName] = true
|
||||
}
|
||||
}
|
||||
|
||||
columns = make([]string, 0, len(columnSet))
|
||||
for columnName := range columnSet {
|
||||
columns = append(columns, columnName)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to SQL rows
|
||||
rows := make([][]sqltypes.Value, len(results))
|
||||
for i, result := range results {
|
||||
row := make([]sqltypes.Value, len(columns))
|
||||
for j, columnName := range columns {
|
||||
if value, exists := result.Values[columnName]; exists {
|
||||
row[j] = convertSchemaValueToSQL(value)
|
||||
} else {
|
||||
row[j] = sqltypes.NULL
|
||||
}
|
||||
}
|
||||
rows[i] = row
|
||||
}
|
||||
|
||||
return &QueryResult{
|
||||
Columns: columns,
|
||||
Rows: rows,
|
||||
}
|
||||
}
|
||||
|
||||
// convertSchemaValueToSQL converts schema_pb.Value to sqltypes.Value
|
||||
func convertSchemaValueToSQL(value *schema_pb.Value) sqltypes.Value {
|
||||
if value == nil {
|
||||
|
||||
Reference in New Issue
Block a user