Worker set its working directory (#8461)
* set working directory * consolidate to worker directory * working directory * correct directory name * refactoring to use wildcard matcher * simplify * cleaning ec working directory * fix reference * clean * adjust test
This commit is contained in:
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
|
||||
ecstorage "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
|
||||
erasurecodingtask "github.com/seaweedfs/seaweedfs/weed/worker/tasks/erasure_coding"
|
||||
workertypes "github.com/seaweedfs/seaweedfs/weed/worker/types"
|
||||
"google.golang.org/grpc"
|
||||
@@ -280,7 +281,7 @@ func emitErasureCodingDetectionDecisionTrace(
|
||||
|
||||
quietThreshold := time.Duration(taskConfig.QuietForSeconds) * time.Second
|
||||
minSizeBytes := uint64(taskConfig.MinSizeMB) * 1024 * 1024
|
||||
allowedCollections := erasurecodingtask.ParseCollectionFilter(taskConfig.CollectionFilter)
|
||||
allowedCollections := wildcard.CompileWildcardMatchers(taskConfig.CollectionFilter)
|
||||
|
||||
volumeGroups := make(map[uint32][]*workertypes.VolumeHealthMetrics)
|
||||
for _, metric := range metrics {
|
||||
@@ -318,7 +319,7 @@ func emitErasureCodingDetectionDecisionTrace(
|
||||
skippedTooSmall++
|
||||
continue
|
||||
}
|
||||
if len(allowedCollections) > 0 && !allowedCollections[metric.Collection] {
|
||||
if len(allowedCollections) > 0 && !wildcard.MatchesAnyWildcard(allowedCollections, metric.Collection) {
|
||||
skippedCollectionFilter++
|
||||
continue
|
||||
}
|
||||
@@ -569,9 +570,7 @@ func (h *ErasureCodingHandler) collectVolumeMetrics(
|
||||
masterAddresses []string,
|
||||
collectionFilter string,
|
||||
) ([]*workertypes.VolumeHealthMetrics, *topology.ActiveTopology, error) {
|
||||
// Reuse the same master topology fetch/build flow used by the vacuum handler.
|
||||
helper := &VacuumHandler{grpcDialOption: h.grpcDialOption}
|
||||
return helper.collectVolumeMetrics(ctx, masterAddresses, collectionFilter)
|
||||
return collectVolumeMetricsFromMasters(ctx, masterAddresses, collectionFilter, h.grpcDialOption)
|
||||
}
|
||||
|
||||
func deriveErasureCodingWorkerConfig(values map[string]*plugin_pb.ConfigValue) *erasureCodingWorkerConfig {
|
||||
@@ -906,7 +905,7 @@ func assignECShardIDs(totalShards int, targetCount int) [][]uint32 {
|
||||
func defaultErasureCodingWorkingDir(baseWorkingDir string) string {
|
||||
dir := strings.TrimSpace(baseWorkingDir)
|
||||
if dir == "" {
|
||||
return filepath.Join(".", "seaweedfs-ec")
|
||||
return filepath.Join(".", "erasure_coding")
|
||||
}
|
||||
return filepath.Join(dir, "seaweedfs-ec")
|
||||
return filepath.Join(dir, "erasure_coding")
|
||||
}
|
||||
|
||||
@@ -3,15 +3,12 @@ package pluginworker
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/admin/topology"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
|
||||
vacuumtask "github.com/seaweedfs/seaweedfs/weed/worker/tasks/vacuum"
|
||||
@@ -492,62 +489,7 @@ func (h *VacuumHandler) collectVolumeMetrics(
|
||||
masterAddresses []string,
|
||||
collectionFilter string,
|
||||
) ([]*workertypes.VolumeHealthMetrics, *topology.ActiveTopology, error) {
|
||||
if h.grpcDialOption == nil {
|
||||
return nil, nil, fmt.Errorf("grpc dial option is not configured")
|
||||
}
|
||||
if len(masterAddresses) == 0 {
|
||||
return nil, nil, fmt.Errorf("no master addresses provided in cluster context")
|
||||
}
|
||||
|
||||
for _, masterAddress := range masterAddresses {
|
||||
response, err := h.fetchVolumeList(ctx, masterAddress)
|
||||
if err != nil {
|
||||
glog.Warningf("Plugin worker failed master volume list at %s: %v", masterAddress, err)
|
||||
continue
|
||||
}
|
||||
|
||||
metrics, activeTopology, buildErr := buildVolumeMetrics(response, collectionFilter)
|
||||
if buildErr != nil {
|
||||
glog.Warningf("Plugin worker failed to build metrics from master %s: %v", masterAddress, buildErr)
|
||||
continue
|
||||
}
|
||||
return metrics, activeTopology, nil
|
||||
}
|
||||
|
||||
return nil, nil, fmt.Errorf("failed to load topology from all provided masters")
|
||||
}
|
||||
|
||||
func (h *VacuumHandler) fetchVolumeList(ctx context.Context, address string) (*master_pb.VolumeListResponse, error) {
|
||||
var lastErr error
|
||||
for _, candidate := range masterAddressCandidates(address) {
|
||||
if ctx.Err() != nil {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
|
||||
dialCtx, cancelDial := context.WithTimeout(ctx, 5*time.Second)
|
||||
conn, err := pb.GrpcDial(dialCtx, candidate, false, h.grpcDialOption)
|
||||
cancelDial()
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
client := master_pb.NewSeaweedClient(conn)
|
||||
callCtx, cancelCall := context.WithTimeout(ctx, 10*time.Second)
|
||||
response, callErr := client.VolumeList(callCtx, &master_pb.VolumeListRequest{})
|
||||
cancelCall()
|
||||
_ = conn.Close()
|
||||
|
||||
if callErr == nil {
|
||||
return response, nil
|
||||
}
|
||||
lastErr = callErr
|
||||
}
|
||||
|
||||
if lastErr == nil {
|
||||
lastErr = fmt.Errorf("no valid master address candidate")
|
||||
}
|
||||
return nil, lastErr
|
||||
return collectVolumeMetricsFromMasters(ctx, masterAddresses, collectionFilter, h.grpcDialOption)
|
||||
}
|
||||
|
||||
func deriveVacuumConfig(values map[string]*plugin_pb.ConfigValue) *vacuumtask.Config {
|
||||
@@ -558,74 +500,6 @@ func deriveVacuumConfig(values map[string]*plugin_pb.ConfigValue) *vacuumtask.Co
|
||||
return config
|
||||
}
|
||||
|
||||
func buildVolumeMetrics(
|
||||
response *master_pb.VolumeListResponse,
|
||||
collectionFilter string,
|
||||
) ([]*workertypes.VolumeHealthMetrics, *topology.ActiveTopology, error) {
|
||||
if response == nil || response.TopologyInfo == nil {
|
||||
return nil, nil, fmt.Errorf("volume list response has no topology info")
|
||||
}
|
||||
|
||||
activeTopology := topology.NewActiveTopology(10)
|
||||
if err := activeTopology.UpdateTopology(response.TopologyInfo); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
filter := strings.TrimSpace(collectionFilter)
|
||||
volumeSizeLimitBytes := uint64(response.VolumeSizeLimitMb) * 1024 * 1024
|
||||
now := time.Now()
|
||||
metrics := make([]*workertypes.VolumeHealthMetrics, 0, 256)
|
||||
|
||||
for _, dc := range response.TopologyInfo.DataCenterInfos {
|
||||
for _, rack := range dc.RackInfos {
|
||||
for _, node := range rack.DataNodeInfos {
|
||||
for diskType, diskInfo := range node.DiskInfos {
|
||||
for _, volume := range diskInfo.VolumeInfos {
|
||||
if filter != "" && volume.Collection != filter {
|
||||
continue
|
||||
}
|
||||
|
||||
metric := &workertypes.VolumeHealthMetrics{
|
||||
VolumeID: volume.Id,
|
||||
Server: node.Id,
|
||||
ServerAddress: string(pb.NewServerAddressFromDataNode(node)),
|
||||
DiskType: diskType,
|
||||
DiskId: volume.DiskId,
|
||||
DataCenter: dc.Id,
|
||||
Rack: rack.Id,
|
||||
Collection: volume.Collection,
|
||||
Size: volume.Size,
|
||||
DeletedBytes: volume.DeletedByteCount,
|
||||
LastModified: time.Unix(volume.ModifiedAtSecond, 0),
|
||||
ReplicaCount: 1,
|
||||
ExpectedReplicas: int(volume.ReplicaPlacement),
|
||||
IsReadOnly: volume.ReadOnly,
|
||||
}
|
||||
if metric.Size > 0 {
|
||||
metric.GarbageRatio = float64(metric.DeletedBytes) / float64(metric.Size)
|
||||
}
|
||||
if volumeSizeLimitBytes > 0 {
|
||||
metric.FullnessRatio = float64(metric.Size) / float64(volumeSizeLimitBytes)
|
||||
}
|
||||
metric.Age = now.Sub(metric.LastModified)
|
||||
metrics = append(metrics, metric)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
replicaCounts := make(map[uint32]int)
|
||||
for _, metric := range metrics {
|
||||
replicaCounts[metric.VolumeID]++
|
||||
}
|
||||
for _, metric := range metrics {
|
||||
metric.ReplicaCount = replicaCounts[metric.VolumeID]
|
||||
}
|
||||
|
||||
return metrics, activeTopology, nil
|
||||
}
|
||||
|
||||
func buildVacuumProposal(result *workertypes.TaskDetectionResult) (*plugin_pb.JobProposal, error) {
|
||||
if result == nil {
|
||||
return nil, fmt.Errorf("task detection result is nil")
|
||||
@@ -834,25 +708,6 @@ func mapTaskPriority(priority workertypes.TaskPriority) plugin_pb.JobPriority {
|
||||
}
|
||||
}
|
||||
|
||||
func masterAddressCandidates(address string) []string {
|
||||
trimmed := strings.TrimSpace(address)
|
||||
if trimmed == "" {
|
||||
return nil
|
||||
}
|
||||
candidateSet := map[string]struct{}{
|
||||
trimmed: {},
|
||||
}
|
||||
converted := pb.ServerToGrpcAddress(trimmed)
|
||||
candidateSet[converted] = struct{}{}
|
||||
|
||||
candidates := make([]string, 0, len(candidateSet))
|
||||
for candidate := range candidateSet {
|
||||
candidates = append(candidates, candidate)
|
||||
}
|
||||
sort.Strings(candidates)
|
||||
return candidates
|
||||
}
|
||||
|
||||
func shouldSkipDetectionByInterval(lastSuccessfulRun *timestamppb.Timestamp, minIntervalSeconds int) bool {
|
||||
if lastSuccessfulRun == nil || minIntervalSeconds <= 0 {
|
||||
return false
|
||||
|
||||
168
weed/plugin/worker/volume_metrics.go
Normal file
168
weed/plugin/worker/volume_metrics.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package pluginworker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/admin/topology"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
|
||||
workertypes "github.com/seaweedfs/seaweedfs/weed/worker/types"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
func collectVolumeMetricsFromMasters(
|
||||
ctx context.Context,
|
||||
masterAddresses []string,
|
||||
collectionFilter string,
|
||||
grpcDialOption grpc.DialOption,
|
||||
) ([]*workertypes.VolumeHealthMetrics, *topology.ActiveTopology, error) {
|
||||
if grpcDialOption == nil {
|
||||
return nil, nil, fmt.Errorf("grpc dial option is not configured")
|
||||
}
|
||||
if len(masterAddresses) == 0 {
|
||||
return nil, nil, fmt.Errorf("no master addresses provided in cluster context")
|
||||
}
|
||||
|
||||
for _, masterAddress := range masterAddresses {
|
||||
response, err := fetchVolumeList(ctx, masterAddress, grpcDialOption)
|
||||
if err != nil {
|
||||
glog.Warningf("Plugin worker failed master volume list at %s: %v", masterAddress, err)
|
||||
continue
|
||||
}
|
||||
|
||||
metrics, activeTopology, buildErr := buildVolumeMetrics(response, collectionFilter)
|
||||
if buildErr != nil {
|
||||
glog.Warningf("Plugin worker failed to build metrics from master %s: %v", masterAddress, buildErr)
|
||||
continue
|
||||
}
|
||||
return metrics, activeTopology, nil
|
||||
}
|
||||
|
||||
return nil, nil, fmt.Errorf("failed to load topology from all provided masters")
|
||||
}
|
||||
|
||||
func fetchVolumeList(ctx context.Context, address string, grpcDialOption grpc.DialOption) (*master_pb.VolumeListResponse, error) {
|
||||
var lastErr error
|
||||
for _, candidate := range masterAddressCandidates(address) {
|
||||
if ctx.Err() != nil {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
|
||||
dialCtx, cancelDial := context.WithTimeout(ctx, 5*time.Second)
|
||||
conn, err := pb.GrpcDial(dialCtx, candidate, false, grpcDialOption)
|
||||
cancelDial()
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
client := master_pb.NewSeaweedClient(conn)
|
||||
callCtx, cancelCall := context.WithTimeout(ctx, 10*time.Second)
|
||||
response, callErr := client.VolumeList(callCtx, &master_pb.VolumeListRequest{})
|
||||
cancelCall()
|
||||
_ = conn.Close()
|
||||
|
||||
if callErr == nil {
|
||||
return response, nil
|
||||
}
|
||||
lastErr = callErr
|
||||
}
|
||||
|
||||
if lastErr == nil {
|
||||
lastErr = fmt.Errorf("no valid master address candidate")
|
||||
}
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
func buildVolumeMetrics(
|
||||
response *master_pb.VolumeListResponse,
|
||||
collectionFilter string,
|
||||
) ([]*workertypes.VolumeHealthMetrics, *topology.ActiveTopology, error) {
|
||||
if response == nil || response.TopologyInfo == nil {
|
||||
return nil, nil, fmt.Errorf("volume list response has no topology info")
|
||||
}
|
||||
|
||||
activeTopology := topology.NewActiveTopology(10)
|
||||
if err := activeTopology.UpdateTopology(response.TopologyInfo); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
patterns := wildcard.CompileWildcardMatchers(collectionFilter)
|
||||
volumeSizeLimitBytes := uint64(response.VolumeSizeLimitMb) * 1024 * 1024
|
||||
now := time.Now()
|
||||
metrics := make([]*workertypes.VolumeHealthMetrics, 0, 256)
|
||||
|
||||
for _, dc := range response.TopologyInfo.DataCenterInfos {
|
||||
for _, rack := range dc.RackInfos {
|
||||
for _, node := range rack.DataNodeInfos {
|
||||
for diskType, diskInfo := range node.DiskInfos {
|
||||
for _, volume := range diskInfo.VolumeInfos {
|
||||
if !wildcard.MatchesAnyWildcard(patterns, volume.Collection) {
|
||||
continue
|
||||
}
|
||||
|
||||
metric := &workertypes.VolumeHealthMetrics{
|
||||
VolumeID: volume.Id,
|
||||
Server: node.Id,
|
||||
ServerAddress: string(pb.NewServerAddressFromDataNode(node)),
|
||||
DiskType: diskType,
|
||||
DiskId: volume.DiskId,
|
||||
DataCenter: dc.Id,
|
||||
Rack: rack.Id,
|
||||
Collection: volume.Collection,
|
||||
Size: volume.Size,
|
||||
DeletedBytes: volume.DeletedByteCount,
|
||||
LastModified: time.Unix(volume.ModifiedAtSecond, 0),
|
||||
ReplicaCount: 1,
|
||||
ExpectedReplicas: int(volume.ReplicaPlacement),
|
||||
IsReadOnly: volume.ReadOnly,
|
||||
}
|
||||
if metric.Size > 0 {
|
||||
metric.GarbageRatio = float64(metric.DeletedBytes) / float64(metric.Size)
|
||||
}
|
||||
if volumeSizeLimitBytes > 0 {
|
||||
metric.FullnessRatio = float64(metric.Size) / float64(volumeSizeLimitBytes)
|
||||
}
|
||||
metric.Age = now.Sub(metric.LastModified)
|
||||
metrics = append(metrics, metric)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
replicaCounts := make(map[uint32]int)
|
||||
for _, metric := range metrics {
|
||||
replicaCounts[metric.VolumeID]++
|
||||
}
|
||||
for _, metric := range metrics {
|
||||
metric.ReplicaCount = replicaCounts[metric.VolumeID]
|
||||
}
|
||||
|
||||
return metrics, activeTopology, nil
|
||||
}
|
||||
|
||||
func masterAddressCandidates(address string) []string {
|
||||
trimmed := strings.TrimSpace(address)
|
||||
if trimmed == "" {
|
||||
return nil
|
||||
}
|
||||
candidateSet := map[string]struct{}{
|
||||
trimmed: {},
|
||||
}
|
||||
converted := pb.ServerToGrpcAddress(trimmed)
|
||||
candidateSet[converted] = struct{}{}
|
||||
|
||||
candidates := make([]string, 0, len(candidateSet))
|
||||
for candidate := range candidateSet {
|
||||
candidates = append(candidates, candidate)
|
||||
}
|
||||
sort.Strings(candidates)
|
||||
return candidates
|
||||
}
|
||||
Reference in New Issue
Block a user