* fix: resolve gRPC DNS resolution issues in Kubernetes #8384 - Replace direct `grpc.NewClient` calls with `pb.GrpcDial` for consistent connection establishment - Fix async DNS resolution behavior in K8s with `ndots:5` - Ensure high-level components use established helper for reliable networking * refactor: refine gRPC DNS fix and add documentation - Use instance's grpcDialOption in BrokerClient.ConfigureTopic - Add detailed comments to GrpcDial explaining Kubernetes DNS resolution rationale * fix: ensure proper context propagation in broker_client gRPC calls - Pass the provided `ctx` to `pb.GrpcDial` in `ConfigureTopic` and `GetUnflushedMessages` - Ensures that timeouts and cancellations are correctly honored during connection establishment * docs: refine gRPC resolver documentation and cleanup dead code - Enhanced documentation for `GrpcDial` with explicit warnings about global state mutation when using `resolver.SetDefaultScheme("passthrough")`. - Recommended `passthrough:///` prefix as the primary migration path for `grpc.NewClient`. - Removed dead commented-out code for `grpc.WithBlock()` and `grpc.WithTimeout()`.
This commit is contained in:
@@ -51,7 +51,7 @@ func (fds *FilerDiscoveryService) discoverFilersFromMaster(masterAddr pb.ServerA
|
|||||||
// Convert HTTP master address to gRPC address (HTTP port + 10000)
|
// Convert HTTP master address to gRPC address (HTTP port + 10000)
|
||||||
grpcAddr := masterAddr.ToGrpcAddress()
|
grpcAddr := masterAddr.ToGrpcAddress()
|
||||||
|
|
||||||
conn, err := grpc.NewClient(grpcAddr, fds.grpcDialOption)
|
conn, err := pb.GrpcDial(context.Background(), grpcAddr, false, fds.grpcDialOption)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to connect to master at %s: %v", grpcAddr, err)
|
return nil, fmt.Errorf("failed to connect to master at %s: %v", grpcAddr, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -82,13 +82,22 @@ func NewGrpcServer(opts ...grpc.ServerOption) *grpc.Server {
|
|||||||
return grpc.NewServer(options...)
|
return grpc.NewServer(options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GrpcDial establishes a gRPC connection.
|
||||||
|
// IMPORTANT: This function intentionally uses the deprecated grpc.DialContext/grpc.Dial behavior
|
||||||
|
// to preserve the "passthrough" resolver semantics required for Kubernetes ndots/search-domain DNS behavior.
|
||||||
|
// This allows kube DNS suffixes to be correctly appended by the OS resolver.
|
||||||
|
//
|
||||||
|
// Switching to grpc.NewClient (which defaults to the "dns" resolver) would break this behavior
|
||||||
|
// in environments with ndots:5 and many-dot hostnames.
|
||||||
|
//
|
||||||
|
// Safe alternatives if switching to grpc.NewClient:
|
||||||
|
// 1. Prefix the target with "passthrough:///" (e.g., "passthrough:///my-service:8080"). This is the recommended primary migration path.
|
||||||
|
// 2. Call resolver.SetDefaultScheme("passthrough") exactly once during init().
|
||||||
|
// WARNING: This is NOT thread-safe, and mutates global resolver state affecting all grpc.NewClient calls in the process.
|
||||||
func GrpcDial(ctx context.Context, address string, waitForReady bool, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
|
func GrpcDial(ctx context.Context, address string, waitForReady bool, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
|
||||||
// opts = append(opts, grpc.WithBlock())
|
|
||||||
// opts = append(opts, grpc.WithTimeout(time.Duration(5*time.Second)))
|
|
||||||
var options []grpc.DialOption
|
var options []grpc.DialOption
|
||||||
|
|
||||||
options = append(options,
|
options = append(options,
|
||||||
// grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
||||||
grpc.WithDefaultCallOptions(
|
grpc.WithDefaultCallOptions(
|
||||||
grpc.MaxCallSendMsgSize(Max_Message_Size),
|
grpc.MaxCallSendMsgSize(Max_Message_Size),
|
||||||
grpc.MaxCallRecvMsgSize(Max_Message_Size),
|
grpc.MaxCallRecvMsgSize(Max_Message_Size),
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ func (c *BrokerClient) discoverFiler() error {
|
|||||||
return nil // already discovered
|
return nil // already discovered
|
||||||
}
|
}
|
||||||
|
|
||||||
conn, err := grpc.NewClient(c.masterAddress, c.grpcDialOption)
|
conn, err := pb.GrpcDial(context.Background(), c.masterAddress, false, c.grpcDialOption)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to connect to master at %s: %v", c.masterAddress, err)
|
return fmt.Errorf("failed to connect to master at %s: %v", c.masterAddress, err)
|
||||||
}
|
}
|
||||||
@@ -99,14 +99,13 @@ func (c *BrokerClient) findBrokerBalancer() error {
|
|||||||
return fmt.Errorf("failed to discover filer: %v", err)
|
return fmt.Errorf("failed to discover filer: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
conn, err := grpc.NewClient(c.filerAddress, c.grpcDialOption)
|
conn, err := pb.GrpcDial(context.Background(), c.filerAddress, false, c.grpcDialOption)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to connect to filer at %s: %v", c.filerAddress, err)
|
return fmt.Errorf("failed to connect to filer at %s: %v", c.filerAddress, err)
|
||||||
}
|
}
|
||||||
defer conn.Close()
|
defer conn.Close()
|
||||||
|
|
||||||
client := filer_pb.NewSeaweedFilerClient(conn)
|
client := filer_pb.NewSeaweedFilerClient(conn)
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
@@ -143,7 +142,7 @@ type filerClientImpl struct {
|
|||||||
|
|
||||||
// WithFilerClient executes a function with a connected filer client
|
// WithFilerClient executes a function with a connected filer client
|
||||||
func (f *filerClientImpl) WithFilerClient(followRedirect bool, fn func(client filer_pb.SeaweedFilerClient) error) error {
|
func (f *filerClientImpl) WithFilerClient(followRedirect bool, fn func(client filer_pb.SeaweedFilerClient) error) error {
|
||||||
conn, err := grpc.NewClient(f.filerAddress, f.grpcDialOption)
|
conn, err := pb.GrpcDial(context.Background(), f.filerAddress, false, f.grpcDialOption)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to connect to filer at %s: %v", f.filerAddress, err)
|
return fmt.Errorf("failed to connect to filer at %s: %v", f.filerAddress, err)
|
||||||
}
|
}
|
||||||
@@ -317,7 +316,7 @@ func (c *BrokerClient) ConfigureTopic(ctx context.Context, namespace, topicName
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
conn, err := grpc.NewClient(c.brokerAddress, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := pb.GrpcDial(ctx, c.brokerAddress, false, c.grpcDialOption)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to connect to broker at %s: %v", c.brokerAddress, err)
|
return fmt.Errorf("failed to connect to broker at %s: %v", c.brokerAddress, err)
|
||||||
}
|
}
|
||||||
@@ -429,7 +428,7 @@ func (c *BrokerClient) GetUnflushedMessages(ctx context.Context, namespace, topi
|
|||||||
glog.V(2).Infof("Found broker at address: %s", c.brokerAddress)
|
glog.V(2).Infof("Found broker at address: %s", c.brokerAddress)
|
||||||
|
|
||||||
// Step 2: Connect to broker
|
// Step 2: Connect to broker
|
||||||
conn, err := grpc.NewClient(c.brokerAddress, c.grpcDialOption)
|
conn, err := pb.GrpcDial(ctx, c.brokerAddress, false, c.grpcDialOption)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(2).Infof("Failed to connect to broker %s: %v", c.brokerAddress, err)
|
glog.V(2).Infof("Failed to connect to broker %s: %v", c.brokerAddress, err)
|
||||||
// Return empty slice if connection fails - prevents double-counting
|
// Return empty slice if connection fails - prevents double-counting
|
||||||
|
|||||||
Reference in New Issue
Block a user