Files
seaweedFS/weed/pb/grpc_client_server.go
Chris Lu 995dfc4d5d chore: remove ~50k lines of unreachable dead code (#8913)
* chore: remove unreachable dead code across the codebase

Remove ~50,000 lines of unreachable code identified by static analysis.

Major removals:
- weed/filer/redis_lua: entire unused Redis Lua filer store implementation
- weed/wdclient/net2, resource_pool: unused connection/resource pool packages
- weed/plugin/worker/lifecycle: unused lifecycle plugin worker
- weed/s3api: unused S3 policy templates, presigned URL IAM, streaming copy,
  multipart IAM, key rotation, and various SSE helper functions
- weed/mq/kafka: unused partition mapping, compression, schema, and protocol functions
- weed/mq/offset: unused SQL storage and migration code
- weed/worker: unused registry, task, and monitoring functions
- weed/query: unused SQL engine, parquet scanner, and type functions
- weed/shell: unused EC proportional rebalance functions
- weed/storage/erasure_coding/distribution: unused distribution analysis functions
- Individual unreachable functions removed from 150+ files across admin,
  credential, filer, iam, kms, mount, mq, operation, pb, s3api, server,
  shell, storage, topology, and util packages

* fix(s3): reset shared memory store in IAM test to prevent flaky failure

TestLoadIAMManagerFromConfig_EmptyConfigWithFallbackKey was flaky because
the MemoryStore credential backend is a singleton registered via init().
Earlier tests that create anonymous identities pollute the shared store,
causing LookupAnonymous() to unexpectedly return true.

Fix by calling Reset() on the memory store before the test runs.

* style: run gofmt on changed files

* fix: restore KMS functions used by integration tests

* fix(plugin): prevent panic on send to closed worker session channel

The Plugin.sendToWorker method could panic with "send on closed channel"
when a worker disconnected while a message was being sent. The race was
between streamSession.close() closing the outgoing channel and sendToWorker
writing to it concurrently.

Add a done channel to streamSession that is closed before the outgoing
channel, and check it in sendToWorker's select to safely detect closed
sessions without panicking.
2026-04-03 16:04:27 -07:00

447 lines
15 KiB
Go

package pb
import (
"context"
"fmt"
"math/rand/v2"
"net"
"net/http"
"os"
"strconv"
"strings"
"sync"
"time"
"github.com/google/uuid"
"github.com/seaweedfs/seaweedfs/weed/util/request_id"
"google.golang.org/grpc/metadata"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/status"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
)
const (
Max_Message_Size = 1 << 30 // 1 GB
// gRPC keepalive settings - must be consistent between client and server
GrpcKeepAliveTime = 60 * time.Second // ping interval when no activity
GrpcKeepAliveTimeout = 20 * time.Second // ping timeout
GrpcKeepAliveMinimumTime = 20 * time.Second // minimum interval between client pings (enforcement)
)
var (
// cache grpc connections
grpcClients = make(map[string]*versionedGrpcClient)
grpcClientsLock sync.Mutex
// localGrpcSockets maps gRPC port numbers to Unix socket paths.
// When registered (by mini mode), gRPC clients connect via Unix socket
// instead of TCP for local services.
localGrpcSockets = make(map[int]string)
localGrpcSocketsLock sync.RWMutex
)
type versionedGrpcClient struct {
*grpc.ClientConn
version int
errCount int
}
func init() {
http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = 1024
http.DefaultTransport.(*http.Transport).MaxIdleConns = 1024
}
// RegisterLocalGrpcSocket registers a Unix socket path for a gRPC port.
// When a gRPC client dials an address on this port, it uses the Unix socket.
func RegisterLocalGrpcSocket(grpcPort int, socketPath string) {
localGrpcSocketsLock.Lock()
defer localGrpcSocketsLock.Unlock()
localGrpcSockets[grpcPort] = socketPath
}
// GetLocalGrpcSocket returns the Unix socket path for a gRPC port, or empty if not registered.
func GetLocalGrpcSocket(grpcPort int) string {
localGrpcSocketsLock.RLock()
defer localGrpcSocketsLock.RUnlock()
return localGrpcSockets[grpcPort]
}
// resolveLocalGrpcSocket extracts the port from a gRPC address and returns
// the registered Unix socket path, if any.
func resolveLocalGrpcSocket(address string) string {
_, portStr, err := net.SplitHostPort(address)
if err != nil {
return ""
}
port, err := strconv.Atoi(portStr)
if err != nil {
return ""
}
return GetLocalGrpcSocket(port)
}
// ServeGrpcOnLocalSocket starts serving a gRPC server on a Unix socket
// if one is registered for the given port.
func ServeGrpcOnLocalSocket(grpcServer *grpc.Server, grpcPort int) {
socketPath := GetLocalGrpcSocket(grpcPort)
if socketPath == "" {
return
}
if err := os.Remove(socketPath); err != nil && !os.IsNotExist(err) {
glog.Warningf("Failed to remove old gRPC socket %s: %v", socketPath, err)
}
listener, err := net.Listen("unix", socketPath)
if err != nil {
glog.Errorf("Failed to listen on gRPC Unix socket %s: %v", socketPath, err)
return
}
glog.V(0).Infof("gRPC also listening on Unix socket %s", socketPath)
go func() {
if err := grpcServer.Serve(listener); err != nil && err != grpc.ErrServerStopped {
glog.Errorf("gRPC Unix socket server error on %s: %v", socketPath, err)
}
os.Remove(socketPath)
}()
}
func NewGrpcServer(opts ...grpc.ServerOption) *grpc.Server {
var options []grpc.ServerOption
options = append(options,
grpc.KeepaliveParams(keepalive.ServerParameters{
Time: GrpcKeepAliveTime, // server pings client if no activity for this long
Timeout: GrpcKeepAliveTimeout, // ping timeout
}),
grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
MinTime: GrpcKeepAliveMinimumTime, // min time a client should wait before sending a ping
PermitWithoutStream: true,
}),
grpc.MaxRecvMsgSize(Max_Message_Size),
grpc.MaxSendMsgSize(Max_Message_Size),
grpc.MaxConcurrentStreams(1000), // Allow more concurrent streams
grpc.InitialWindowSize(16*1024*1024), // 16MB initial window
grpc.InitialConnWindowSize(16*1024*1024), // 16MB connection window
grpc.MaxHeaderListSize(8*1024*1024), // 8MB header list limit
grpc.UnaryInterceptor(requestIDUnaryInterceptor()),
)
for _, opt := range opts {
if opt != nil {
options = append(options, opt)
}
}
return grpc.NewServer(options...)
}
// GrpcDial establishes a gRPC connection.
// IMPORTANT: This function intentionally uses the deprecated grpc.DialContext/grpc.Dial behavior
// to preserve the "passthrough" resolver semantics required for Kubernetes ndots/search-domain DNS behavior.
// This allows kube DNS suffixes to be correctly appended by the OS resolver.
//
// Switching to grpc.NewClient (which defaults to the "dns" resolver) would break this behavior
// in environments with ndots:5 and many-dot hostnames.
//
// Safe alternatives if switching to grpc.NewClient:
// 1. Prefix the target with "passthrough:///" (e.g., "passthrough:///my-service:8080"). This is the recommended primary migration path.
// 2. Call resolver.SetDefaultScheme("passthrough") exactly once during init().
// WARNING: This is NOT thread-safe, and mutates global resolver state affecting all grpc.NewClient calls in the process.
func GrpcDial(ctx context.Context, address string, waitForReady bool, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
var options []grpc.DialOption
// Route through Unix socket if one is registered for this address's port
if socketPath := resolveLocalGrpcSocket(address); socketPath != "" {
options = append(options, grpc.WithContextDialer(func(ctx context.Context, _ string) (net.Conn, error) {
var d net.Dialer
return d.DialContext(ctx, "unix", socketPath)
}))
}
options = append(options,
grpc.WithDefaultCallOptions(
grpc.MaxCallSendMsgSize(Max_Message_Size),
grpc.MaxCallRecvMsgSize(Max_Message_Size),
grpc.WaitForReady(waitForReady),
),
grpc.WithKeepaliveParams(keepalive.ClientParameters{
Time: GrpcKeepAliveTime, // client ping server if no activity for this long
Timeout: GrpcKeepAliveTimeout, // ping timeout
// Disable pings when there are no active streams to avoid triggering
// server enforcement for too-frequent pings from idle clients.
PermitWithoutStream: false,
}))
for _, opt := range opts {
if opt != nil {
options = append(options, opt)
}
}
return grpc.DialContext(ctx, address, options...)
}
func getOrCreateConnection(address string, waitForReady bool, opts ...grpc.DialOption) (*versionedGrpcClient, error) {
grpcClientsLock.Lock()
defer grpcClientsLock.Unlock()
existingConnection, found := grpcClients[address]
if found {
glog.V(4).Infof("gRPC cache hit for %s (version %d)", address, existingConnection.version)
return existingConnection, nil
}
glog.V(2).Infof("Creating new gRPC connection to %s", address)
ctx := context.Background()
grpcConnection, err := GrpcDial(ctx, address, waitForReady, opts...)
if err != nil {
return nil, fmt.Errorf("fail to dial %s: %v", address, err)
}
vgc := &versionedGrpcClient{
grpcConnection,
rand.Int(),
0,
}
grpcClients[address] = vgc
glog.V(2).Infof("New gRPC connection established to %s (version %d)", address, vgc.version)
return vgc, nil
}
func requestIDUnaryInterceptor() grpc.UnaryServerInterceptor {
return func(
ctx context.Context,
req interface{},
info *grpc.UnaryServerInfo,
handler grpc.UnaryHandler,
) (interface{}, error) {
// Get request ID from incoming metadata
var reqID string
if incomingMd, ok := metadata.FromIncomingContext(ctx); ok {
if idList := incomingMd.Get(request_id.AmzRequestIDHeader); len(idList) > 0 {
reqID = idList[0]
}
}
if reqID == "" {
reqID = uuid.New().String()
}
// Store request ID in context for handlers to access
ctx = request_id.Set(ctx, reqID)
// Also set outgoing context so handlers making downstream gRPC calls
// will automatically propagate the request ID
ctx = metadata.AppendToOutgoingContext(ctx, request_id.AmzRequestIDHeader, reqID)
// Set trailer with request ID for response
grpc.SetTrailer(ctx, metadata.Pairs(request_id.AmzRequestIDHeader, reqID))
return handler(ctx, req)
}
}
// shouldInvalidateConnection checks if an error indicates the cached connection should be invalidated
func shouldInvalidateConnection(err error) bool {
if err == nil {
return false
}
// Check gRPC status codes first (more reliable)
if s, ok := status.FromError(err); ok {
code := s.Code()
switch code {
case codes.Unavailable, codes.Canceled, codes.DeadlineExceeded, codes.Aborted, codes.Internal:
return true
}
}
// Fall back to string matching for transport-level errors not captured by gRPC codes
errStr := err.Error()
errLower := strings.ToLower(errStr)
return strings.Contains(errLower, "transport") ||
strings.Contains(errLower, "connection closed") ||
strings.Contains(errLower, "dns") ||
strings.Contains(errLower, "connection refused") ||
strings.Contains(errLower, "no route to host") ||
strings.Contains(errLower, "network is unreachable") ||
strings.Contains(errLower, "connection reset")
}
// WithGrpcClient In streamingMode, always use a fresh connection. Otherwise, try to reuse an existing connection.
func WithGrpcClient(streamingMode bool, signature int32, fn func(*grpc.ClientConn) error, address string, waitForReady bool, opts ...grpc.DialOption) error {
if !streamingMode {
vgc, err := getOrCreateConnection(address, waitForReady, opts...)
if err != nil {
return fmt.Errorf("getOrCreateConnection %s: %v", address, err)
}
executionErr := fn(vgc.ClientConn)
if executionErr != nil {
if shouldInvalidateConnection(executionErr) {
grpcClientsLock.Lock()
if t, ok := grpcClients[address]; ok {
if t.version == vgc.version {
glog.V(1).Infof("Removing cached gRPC connection to %s due to error: %v", address, executionErr)
vgc.Close()
delete(grpcClients, address)
}
}
grpcClientsLock.Unlock()
}
}
return executionErr
} else {
ctx := context.Background()
if signature != 0 {
// Optimize: Use AppendToOutgoingContext instead of creating new map
ctx = metadata.AppendToOutgoingContext(ctx, "sw-client-id", fmt.Sprintf("%d", signature))
}
grpcConnection, err := GrpcDial(ctx, address, waitForReady, opts...)
if err != nil {
return fmt.Errorf("fail to dial %s: %v", address, err)
}
defer grpcConnection.Close()
executionErr := fn(grpcConnection)
if executionErr != nil {
return executionErr
}
return nil
}
}
func hostAndPort(address string) (host string, port uint64, err error) {
colonIndex := strings.LastIndex(address, ":")
if colonIndex < 0 {
return "", 0, fmt.Errorf("server should have hostname:port format: %v", address)
}
dotIndex := strings.LastIndex(address, ".")
if dotIndex > colonIndex {
// port format is "port.grpcPort"
port, err = strconv.ParseUint(address[colonIndex+1:dotIndex], 10, 64)
if err != nil {
return "", 0, fmt.Errorf("server port parse error: %w", err)
}
return address[:colonIndex], port, err
}
port, err = strconv.ParseUint(address[colonIndex+1:], 10, 64)
if err != nil {
return "", 0, fmt.Errorf("server port parse error: %w", err)
}
return address[:colonIndex], port, err
}
func ServerToGrpcAddress(server string) (serverGrpcAddress string) {
colonIndex := strings.LastIndex(server, ":")
if colonIndex >= 0 {
if dotIndex := strings.LastIndex(server, "."); dotIndex > colonIndex {
// port format is "port.grpcPort"
// return the host:grpcPort
host := server[:colonIndex]
grpcPort := server[dotIndex+1:]
if _, err := strconv.ParseUint(grpcPort, 10, 64); err == nil {
return util.JoinHostPort(host, int(0+util.ParseInt(grpcPort, 0)))
}
}
}
host, port, parseErr := hostAndPort(server)
if parseErr != nil {
glog.Fatalf("server address %s parse error: %v", server, parseErr)
}
grpcPort := int(port) + 10000
return util.JoinHostPort(host, grpcPort)
}
func GrpcAddressToServerAddress(grpcAddress string) (serverAddress string) {
host, grpcPort, parseErr := hostAndPort(grpcAddress)
if parseErr != nil {
glog.Fatalf("server grpc address %s parse error: %v", grpcAddress, parseErr)
}
port := int(grpcPort) - 10000
return util.JoinHostPort(host, port)
}
func WithMasterClient(streamingMode bool, master ServerAddress, grpcDialOption grpc.DialOption, waitForReady bool, fn func(client master_pb.SeaweedClient) error) error {
return WithGrpcClient(streamingMode, 0, func(grpcConnection *grpc.ClientConn) error {
client := master_pb.NewSeaweedClient(grpcConnection)
return fn(client)
}, master.ToGrpcAddress(), waitForReady, grpcDialOption)
}
func WithVolumeServerClient(streamingMode bool, volumeServer ServerAddress, grpcDialOption grpc.DialOption, fn func(client volume_server_pb.VolumeServerClient) error) error {
return WithGrpcClient(streamingMode, 0, func(grpcConnection *grpc.ClientConn) error {
client := volume_server_pb.NewVolumeServerClient(grpcConnection)
return fn(client)
}, volumeServer.ToGrpcAddress(), false, grpcDialOption)
}
func WithOneOfGrpcMasterClients(streamingMode bool, masterGrpcAddresses map[string]ServerAddress, grpcDialOption grpc.DialOption, fn func(client master_pb.SeaweedClient) error) (err error) {
for _, masterGrpcAddress := range masterGrpcAddresses {
err = WithGrpcClient(streamingMode, 0, func(grpcConnection *grpc.ClientConn) error {
client := master_pb.NewSeaweedClient(grpcConnection)
return fn(client)
}, masterGrpcAddress.ToGrpcAddress(), false, grpcDialOption)
if err == nil {
return nil
}
}
return err
}
func WithBrokerGrpcClient(streamingMode bool, brokerGrpcAddress string, grpcDialOption grpc.DialOption, fn func(client mq_pb.SeaweedMessagingClient) error) error {
return WithGrpcClient(streamingMode, 0, func(grpcConnection *grpc.ClientConn) error {
client := mq_pb.NewSeaweedMessagingClient(grpcConnection)
return fn(client)
}, brokerGrpcAddress, false, grpcDialOption)
}
func WithFilerClient(streamingMode bool, signature int32, filer ServerAddress, grpcDialOption grpc.DialOption, fn func(client filer_pb.SeaweedFilerClient) error) error {
return WithGrpcFilerClient(streamingMode, signature, filer, grpcDialOption, fn)
}
func WithGrpcFilerClient(streamingMode bool, signature int32, filerAddress ServerAddress, grpcDialOption grpc.DialOption, fn func(client filer_pb.SeaweedFilerClient) error) error {
return WithGrpcClient(streamingMode, signature, func(grpcConnection *grpc.ClientConn) error {
client := filer_pb.NewSeaweedFilerClient(grpcConnection)
return fn(client)
}, filerAddress.ToGrpcAddress(), false, grpcDialOption)
}
func WithOneOfGrpcFilerClients(streamingMode bool, filerAddresses []ServerAddress, grpcDialOption grpc.DialOption, fn func(client filer_pb.SeaweedFilerClient) error) (err error) {
for _, filerAddress := range filerAddresses {
err = WithGrpcClient(streamingMode, 0, func(grpcConnection *grpc.ClientConn) error {
client := filer_pb.NewSeaweedFilerClient(grpcConnection)
return fn(client)
}, filerAddress.ToGrpcAddress(), false, grpcDialOption)
if err == nil {
return nil
}
}
return err
}