seaweedFS/weed/storage/erasure_coding/distribution/config.go

// Package distribution provides EC shard distribution algorithms with configurable EC ratios.
package distribution

import (
	"fmt"

	"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
)

// ECConfig holds erasure coding configuration parameters.
// This replaces hard-coded constants like DataShardsCount=10, ParityShardsCount=4.
type ECConfig struct {
	DataShards   int // Number of data shards (e.g., 10)
	ParityShards int // Number of parity shards (e.g., 4)
}

// DefaultECConfig returns the standard 10+4 EC configuration
func DefaultECConfig() ECConfig {
	return ECConfig{
		DataShards:   10,
		ParityShards: 4,
	}
}

// NewECConfig creates a new EC configuration with validation
func NewECConfig(dataShards, parityShards int) (ECConfig, error) {
	if dataShards <= 0 {
		return ECConfig{}, fmt.Errorf("dataShards must be positive, got %d", dataShards)
	}
	if parityShards <= 0 {
		return ECConfig{}, fmt.Errorf("parityShards must be positive, got %d", parityShards)
	}
	if dataShards+parityShards > 32 {
		return ECConfig{}, fmt.Errorf("total shards (%d+%d=%d) exceeds maximum of 32",
			dataShards, parityShards, dataShards+parityShards)
	}
	return ECConfig{
		DataShards:   dataShards,
		ParityShards: parityShards,
	}, nil
}

// TotalShards returns the total number of shards (data + parity)
func (c ECConfig) TotalShards() int {
	return c.DataShards + c.ParityShards
}

// MaxTolerableLoss returns the maximum number of shards that can be lost
// while still being able to reconstruct the data
func (c ECConfig) MaxTolerableLoss() int {
	return c.ParityShards
}

// MinShardsForReconstruction returns the minimum number of shards needed
// to reconstruct the original data
func (c ECConfig) MinShardsForReconstruction() int {
	return c.DataShards
}

// String returns a human-readable representation
func (c ECConfig) String() string {
	return fmt.Sprintf("%d+%d (total: %d, can lose: %d)",
		c.DataShards, c.ParityShards, c.TotalShards(), c.MaxTolerableLoss())
}

// IsDataShard returns true if the shard ID is a data shard (0 to DataShards-1)
func (c ECConfig) IsDataShard(shardID int) bool {
	return shardID >= 0 && shardID < c.DataShards
}

// IsParityShard returns true if the shard ID is a parity shard (DataShards to TotalShards-1)
func (c ECConfig) IsParityShard(shardID int) bool {
	return shardID >= c.DataShards && shardID < c.TotalShards()
}

// SortShardsDataFirst returns a copy of shards sorted with data shards first.
// This is useful for initial placement where data shards should be spread out first.
func (c ECConfig) SortShardsDataFirst(shards []int) []int {
	result := make([]int, len(shards))
	copy(result, shards)

	// Partition: data shards first, then parity shards
	dataIdx := 0
	parityIdx := len(result) - 1

	sorted := make([]int, len(result))
	for _, s := range result {
		if c.IsDataShard(s) {
			sorted[dataIdx] = s
			dataIdx++
		} else {
			sorted[parityIdx] = s
			parityIdx--
		}
	}

	return sorted
}

// SortShardsParityFirst returns a copy of shards sorted with parity shards first.
// This is useful for rebalancing where we prefer to move parity shards.
func (c ECConfig) SortShardsParityFirst(shards []int) []int {
	result := make([]int, len(shards))
	copy(result, shards)

	// Partition: parity shards first, then data shards
	parityIdx := 0
	dataIdx := len(result) - 1

	sorted := make([]int, len(result))
	for _, s := range result {
		if c.IsParityShard(s) {
			sorted[parityIdx] = s
			parityIdx++
		} else {
			sorted[dataIdx] = s
			dataIdx--
		}
	}

	return sorted
}

// ReplicationConfig holds the parsed replication policy
type ReplicationConfig struct {
	MinDataCenters  int // X+1 from XYZ replication (minimum DCs to use)
	MinRacksPerDC   int // Y+1 from XYZ replication (minimum racks per DC)
	MinNodesPerRack int // Z+1 from XYZ replication (minimum nodes per rack)

	// Original replication string (for logging/debugging)
	Original string
}

// NewReplicationConfig creates a ReplicationConfig from a ReplicaPlacement
func NewReplicationConfig(rp *super_block.ReplicaPlacement) ReplicationConfig {
	if rp == nil {
		return ReplicationConfig{
			MinDataCenters:  1,
			MinRacksPerDC:   1,
			MinNodesPerRack: 1,
			Original:        "000",
		}
	}
	return ReplicationConfig{
		MinDataCenters:  rp.DiffDataCenterCount + 1,
		MinRacksPerDC:   rp.DiffRackCount + 1,
		MinNodesPerRack: rp.SameRackCount + 1,
		Original:        rp.String(),
	}
}

// NewReplicationConfigFromString creates a ReplicationConfig from a replication string
func NewReplicationConfigFromString(replication string) (ReplicationConfig, error) {
	rp, err := super_block.NewReplicaPlacementFromString(replication)
	if err != nil {
		return ReplicationConfig{}, err
	}
	return NewReplicationConfig(rp), nil
}

// TotalPlacementSlots returns the minimum number of unique placement locations
// based on the replication policy
func (r ReplicationConfig) TotalPlacementSlots() int {
	return r.MinDataCenters * r.MinRacksPerDC * r.MinNodesPerRack
}

// String returns a human-readable representation
func (r ReplicationConfig) String() string {
	return fmt.Sprintf("replication=%s (DCs:%d, Racks/DC:%d, Nodes/Rack:%d)",
		r.Original, r.MinDataCenters, r.MinRacksPerDC, r.MinNodesPerRack)
}