fix: resolve gRPC DNS resolution issues in Kubernetes #8384 (#8387)

* fix: resolve gRPC DNS resolution issues in Kubernetes #8384

- Replace direct `grpc.NewClient` calls with `pb.GrpcDial` for consistent connection establishment
- Fix async DNS resolution behavior in K8s with `ndots:5`
- Ensure high-level components use established helper for reliable networking

* refactor: refine gRPC DNS fix and add documentation

- Use instance's grpcDialOption in BrokerClient.ConfigureTopic
- Add detailed comments to GrpcDial explaining Kubernetes DNS resolution rationale

* fix: ensure proper context propagation in broker_client gRPC calls

- Pass the provided `ctx` to `pb.GrpcDial` in `ConfigureTopic` and `GetUnflushedMessages`
- Ensures that timeouts and cancellations are correctly honored during connection establishment

* docs: refine gRPC resolver documentation and cleanup dead code

- Enhanced documentation for `GrpcDial` with explicit warnings about global state mutation when using `resolver.SetDefaultScheme("passthrough")`.
- Recommended `passthrough:///` prefix as the primary migration path for `grpc.NewClient`.
- Removed dead commented-out code for `grpc.WithBlock()` and `grpc.WithTimeout()`.
This commit is contained in:
Chris Lu
2026-02-19 15:46:02 -08:00
committed by GitHub
parent e9c45144cf
commit a2005cb2a6
3 changed files with 18 additions and 10 deletions

View File

@@ -58,7 +58,7 @@ func (c *BrokerClient) discoverFiler() error {
return nil // already discovered
}
conn, err := grpc.NewClient(c.masterAddress, c.grpcDialOption)
conn, err := pb.GrpcDial(context.Background(), c.masterAddress, false, c.grpcDialOption)
if err != nil {
return fmt.Errorf("failed to connect to master at %s: %v", c.masterAddress, err)
}
@@ -99,14 +99,13 @@ func (c *BrokerClient) findBrokerBalancer() error {
return fmt.Errorf("failed to discover filer: %v", err)
}
conn, err := grpc.NewClient(c.filerAddress, c.grpcDialOption)
conn, err := pb.GrpcDial(context.Background(), c.filerAddress, false, c.grpcDialOption)
if err != nil {
return fmt.Errorf("failed to connect to filer at %s: %v", c.filerAddress, err)
}
defer conn.Close()
client := filer_pb.NewSeaweedFilerClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
@@ -143,7 +142,7 @@ type filerClientImpl struct {
// WithFilerClient executes a function with a connected filer client
func (f *filerClientImpl) WithFilerClient(followRedirect bool, fn func(client filer_pb.SeaweedFilerClient) error) error {
conn, err := grpc.NewClient(f.filerAddress, f.grpcDialOption)
conn, err := pb.GrpcDial(context.Background(), f.filerAddress, false, f.grpcDialOption)
if err != nil {
return fmt.Errorf("failed to connect to filer at %s: %v", f.filerAddress, err)
}
@@ -317,7 +316,7 @@ func (c *BrokerClient) ConfigureTopic(ctx context.Context, namespace, topicName
return err
}
conn, err := grpc.NewClient(c.brokerAddress, grpc.WithTransportCredentials(insecure.NewCredentials()))
conn, err := pb.GrpcDial(ctx, c.brokerAddress, false, c.grpcDialOption)
if err != nil {
return fmt.Errorf("failed to connect to broker at %s: %v", c.brokerAddress, err)
}
@@ -429,7 +428,7 @@ func (c *BrokerClient) GetUnflushedMessages(ctx context.Context, namespace, topi
glog.V(2).Infof("Found broker at address: %s", c.brokerAddress)
// Step 2: Connect to broker
conn, err := grpc.NewClient(c.brokerAddress, c.grpcDialOption)
conn, err := pb.GrpcDial(ctx, c.brokerAddress, false, c.grpcDialOption)
if err != nil {
glog.V(2).Infof("Failed to connect to broker %s: %v", c.brokerAddress, err)
// Return empty slice if connection fails - prevents double-counting