chore: remove ~50k lines of unreachable dead code (#8913)

* chore: remove unreachable dead code across the codebase

Remove ~50,000 lines of unreachable code identified by static analysis.

Major removals:
- weed/filer/redis_lua: entire unused Redis Lua filer store implementation
- weed/wdclient/net2, resource_pool: unused connection/resource pool packages
- weed/plugin/worker/lifecycle: unused lifecycle plugin worker
- weed/s3api: unused S3 policy templates, presigned URL IAM, streaming copy,
  multipart IAM, key rotation, and various SSE helper functions
- weed/mq/kafka: unused partition mapping, compression, schema, and protocol functions
- weed/mq/offset: unused SQL storage and migration code
- weed/worker: unused registry, task, and monitoring functions
- weed/query: unused SQL engine, parquet scanner, and type functions
- weed/shell: unused EC proportional rebalance functions
- weed/storage/erasure_coding/distribution: unused distribution analysis functions
- Individual unreachable functions removed from 150+ files across admin,
  credential, filer, iam, kms, mount, mq, operation, pb, s3api, server,
  shell, storage, topology, and util packages

* fix(s3): reset shared memory store in IAM test to prevent flaky failure

TestLoadIAMManagerFromConfig_EmptyConfigWithFallbackKey was flaky because
the MemoryStore credential backend is a singleton registered via init().
Earlier tests that create anonymous identities pollute the shared store,
causing LookupAnonymous() to unexpectedly return true.

Fix by calling Reset() on the memory store before the test runs.

* style: run gofmt on changed files

* fix: restore KMS functions used by integration tests

* fix(plugin): prevent panic on send to closed worker session channel

The Plugin.sendToWorker method could panic with "send on closed channel"
when a worker disconnected while a message was being sent. The race was
between streamSession.close() closing the outgoing channel and sendToWorker
writing to it concurrently.

Add a done channel to streamSession that is closed before the outgoing
channel, and check it in sendToWorker's select to safely detect closed
sessions without panicking.
This commit is contained in:
Chris Lu
2026-04-03 16:04:27 -07:00
committed by GitHub
parent 8fad85aed7
commit 995dfc4d5d
264 changed files with 62 additions and 46027 deletions

View File

@@ -1,280 +1,14 @@
package engine
import (
"context"
"fmt"
"math/big"
"time"
"github.com/seaweedfs/seaweedfs/weed/mq/schema"
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"github.com/seaweedfs/seaweedfs/weed/query/sqltypes"
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
)
// ParquetScanner scans MQ topic Parquet files for SELECT queries
// Assumptions:
// 1. All MQ messages are stored in Parquet format in topic partitions
// 2. Each partition directory contains dated Parquet files
// 3. System columns (_ts_ns, _key) are added to user schema
// 4. Predicate pushdown is used for efficient scanning
type ParquetScanner struct {
filerClient filer_pb.FilerClient
chunkCache chunk_cache.ChunkCache
topic topic.Topic
recordSchema *schema_pb.RecordType
parquetLevels *schema.ParquetLevels
}
// NewParquetScanner creates a scanner for a specific MQ topic
// Assumption: Topic exists and has Parquet files in partition directories
func NewParquetScanner(filerClient filer_pb.FilerClient, namespace, topicName string) (*ParquetScanner, error) {
// Check if filerClient is available
if filerClient == nil {
return nil, fmt.Errorf("filerClient is required but not available")
}
// Create topic reference
t := topic.Topic{
Namespace: namespace,
Name: topicName,
}
// Read topic configuration to get schema
var topicConf *mq_pb.ConfigureTopicResponse
var err error
if err := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
topicConf, err = t.ReadConfFile(client)
return err
}); err != nil {
return nil, fmt.Errorf("failed to read topic config: %v", err)
}
// Build complete schema with system columns - prefer flat schema if available
var recordType *schema_pb.RecordType
if topicConf.GetMessageRecordType() != nil {
// New flat schema format - use directly
recordType = topicConf.GetMessageRecordType()
}
if recordType == nil || len(recordType.Fields) == 0 {
// For topics without schema, create a minimal schema with system fields and _value
recordType = schema.RecordTypeBegin().
WithField(SW_COLUMN_NAME_TIMESTAMP, schema.TypeInt64).
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
WithField(SW_COLUMN_NAME_VALUE, schema.TypeBytes). // Raw message value
RecordTypeEnd()
} else {
// Add system columns that MQ adds to all records
recordType = schema.NewRecordTypeBuilder(recordType).
WithField(SW_COLUMN_NAME_TIMESTAMP, schema.TypeInt64).
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
RecordTypeEnd()
}
// Convert to Parquet levels for efficient reading
parquetLevels, err := schema.ToParquetLevels(recordType)
if err != nil {
return nil, fmt.Errorf("failed to create Parquet levels: %v", err)
}
return &ParquetScanner{
filerClient: filerClient,
chunkCache: chunk_cache.NewChunkCacheInMemory(256), // Same as MQ logstore
topic: t,
recordSchema: recordType,
parquetLevels: parquetLevels,
}, nil
}
// ScanOptions configure how the scanner reads data
type ScanOptions struct {
// Time range filtering (Unix nanoseconds)
StartTimeNs int64
StopTimeNs int64
// Column projection - if empty, select all columns
Columns []string
// Row limit - 0 means no limit
Limit int
// Predicate for WHERE clause filtering
Predicate func(*schema_pb.RecordValue) bool
}
// ScanResult represents a single scanned record
type ScanResult struct {
Values map[string]*schema_pb.Value // Column name -> value
Timestamp int64 // Message timestamp (_ts_ns)
Key []byte // Message key (_key)
}
// Scan reads records from the topic's Parquet files
// Assumptions:
// 1. Scans all partitions of the topic
// 2. Applies time filtering at Parquet level for efficiency
// 3. Applies predicates and projections after reading
func (ps *ParquetScanner) Scan(ctx context.Context, options ScanOptions) ([]ScanResult, error) {
var results []ScanResult
// Get all partitions for this topic
// TODO: Implement proper partition discovery
// For now, assume partition 0 exists
partitions := []topic.Partition{{RangeStart: 0, RangeStop: 1000}}
for _, partition := range partitions {
partitionResults, err := ps.scanPartition(ctx, partition, options)
if err != nil {
return nil, fmt.Errorf("failed to scan partition %v: %v", partition, err)
}
results = append(results, partitionResults...)
// Apply global limit across all partitions
if options.Limit > 0 && len(results) >= options.Limit {
results = results[:options.Limit]
break
}
}
return results, nil
}
// scanPartition scans a specific topic partition
func (ps *ParquetScanner) scanPartition(ctx context.Context, partition topic.Partition, options ScanOptions) ([]ScanResult, error) {
// partitionDir := topic.PartitionDir(ps.topic, partition) // TODO: Use for actual file listing
var results []ScanResult
// List Parquet files in partition directory
// TODO: Implement proper file listing with date range filtering
// For now, this is a placeholder that would list actual Parquet files
// Simulate file processing - in real implementation, this would:
// 1. List files in partitionDir via filerClient
// 2. Filter files by date range if time filtering is enabled
// 3. Process each Parquet file in chronological order
// Placeholder: Create sample data for testing
if len(results) == 0 {
// Generate sample data for demonstration
sampleData := ps.generateSampleData(options)
results = append(results, sampleData...)
}
return results, nil
}
// generateSampleData creates sample data for testing when no real Parquet files exist
func (ps *ParquetScanner) generateSampleData(options ScanOptions) []ScanResult {
now := time.Now().UnixNano()
sampleData := []ScanResult{
{
Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "login"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "192.168.1.1"}`}},
},
Timestamp: now - 3600000000000, // 1 hour ago
Key: []byte("user-1001"),
},
{
Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1002}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "page_view"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"page": "/dashboard"}`}},
},
Timestamp: now - 1800000000000, // 30 minutes ago
Key: []byte("user-1002"),
},
{
Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "logout"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"session_duration": 3600}`}},
},
Timestamp: now - 900000000000, // 15 minutes ago
Key: []byte("user-1001"),
},
}
// Apply predicate filtering if specified
if options.Predicate != nil {
var filtered []ScanResult
for _, result := range sampleData {
// Convert to RecordValue for predicate testing
recordValue := &schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
for k, v := range result.Values {
recordValue.Fields[k] = v
}
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
if options.Predicate(recordValue) {
filtered = append(filtered, result)
}
}
sampleData = filtered
}
// Apply limit
if options.Limit > 0 && len(sampleData) > options.Limit {
sampleData = sampleData[:options.Limit]
}
return sampleData
}
// ConvertToSQLResult converts ScanResults to SQL query results
func (ps *ParquetScanner) ConvertToSQLResult(results []ScanResult, columns []string) *QueryResult {
if len(results) == 0 {
return &QueryResult{
Columns: columns,
Rows: [][]sqltypes.Value{},
}
}
// Determine columns if not specified
if len(columns) == 0 {
columnSet := make(map[string]bool)
for _, result := range results {
for columnName := range result.Values {
columnSet[columnName] = true
}
}
columns = make([]string, 0, len(columnSet))
for columnName := range columnSet {
columns = append(columns, columnName)
}
}
// Convert to SQL rows
rows := make([][]sqltypes.Value, len(results))
for i, result := range results {
row := make([]sqltypes.Value, len(columns))
for j, columnName := range columns {
if value, exists := result.Values[columnName]; exists {
row[j] = convertSchemaValueToSQL(value)
} else {
row[j] = sqltypes.NULL
}
}
rows[i] = row
}
return &QueryResult{
Columns: columns,
Rows: rows,
}
}
// convertSchemaValueToSQL converts schema_pb.Value to sqltypes.Value
func convertSchemaValueToSQL(value *schema_pb.Value) sqltypes.Value {
if value == nil {