Skip to content

Commit ca78c90

Browse files
authored
xds/resolver_test: fix flaky test ResolverBadServiceUpdate_NACKedWithoutCache (#8521)
Fixes: #8435 ### root cause of issue: - I think there was a race condition when channel communicates between the xDS resolver and test infrastructure - insufficient buffer size: original channels (stateCh and errCh) had only buffer size of 1 - blocking sends: When buffer is full, the resolver would block trying to send the next update - test deadlock: test infra might be waiting for a specific update while the resolver was blocked trying to send a different update, creating a deadlock ### Changes 1) Increased buffer size (1 → 10): ``` go stateCh := make(chan resolver.State, 10) errCh := make(chan error, 10) ``` 2) Non-blocking send pattern: ``` go select { case stateCh <- s: // the resolver try to send updates default: // If channel is full, drain old message and retry select { case <-stateCh: stateCh <- s default: } } ``` - make it drain old messages preventing the resolver from blocking and just keeping the most latest updates. 3) Cleanup with draining goroutines: ``` go go func() { for range stateCh { } // Drain any remaining messages }() ``` - it ensures the resolver never blocks on sends and prevents `goroutine leaks` during test cleanup. RELEASE NOTES: N/A
1 parent 83bead4 commit ca78c90

File tree

1 file changed

+41
-5
lines changed

1 file changed

+41
-5
lines changed

internal/xds/resolver/xds_resolver_test.go

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"context"
2323
"encoding/json"
2424
"fmt"
25+
"net/url"
2526
"strings"
2627
"sync"
2728
"testing"
@@ -320,11 +321,46 @@ func (s) TestResolverBadServiceUpdate_NACKedWithoutCache(t *testing.T) {
320321
}
321322
configureResourcesOnManagementServer(ctx, t, mgmtServer, nodeID, []*v3listenerpb.Listener{lis}, nil)
322323

323-
// Build the resolver and expect an error update from it. Since the
324-
// resource is not cached, it should be received as resource error.
325-
_, errCh, _ := buildResolverForTarget(t, resolver.Target{URL: *testutils.MustParseURL("xds:///" + defaultTestServiceName)}, bc)
326-
if err := waitForErrorFromResolver(ctx, errCh, "no RouteSpecifier", nodeID); err != nil {
327-
t.Fatal(err)
324+
// Build the resolver inline (duplicating buildResolverForTarget internals)
325+
// to avoid issues with blocked channel writes when NACKs occur.
326+
target := resolver.Target{URL: *testutils.MustParseURL("xds:///" + defaultTestServiceName)}
327+
328+
// Create an xDS resolver with the provided bootstrap configuration.
329+
if internal.NewXDSResolverWithConfigForTesting == nil {
330+
t.Fatalf("internal.NewXDSResolverWithConfigForTesting is nil")
331+
}
332+
333+
builder, err := internal.NewXDSResolverWithConfigForTesting.(func([]byte) (resolver.Builder, error))(bc)
334+
if err != nil {
335+
t.Fatalf("Failed to create xDS resolver for testing: %v", err)
336+
}
337+
338+
errCh := testutils.NewChannel()
339+
tcc := &testutils.ResolverClientConn{Logger: t, ReportErrorF: func(err error) { errCh.Replace(err) }}
340+
r, err := builder.Build(target, tcc, resolver.BuildOptions{
341+
Authority: url.PathEscape(target.Endpoint()),
342+
})
343+
if err != nil {
344+
t.Fatalf("Failed to build xDS resolver for target %q: %v", target, err)
345+
}
346+
defer r.Close()
347+
348+
// Wait for and verify the error update from the resolver.
349+
// Since the resource is not cached, it should be received as resource error.
350+
select {
351+
case <-ctx.Done():
352+
t.Fatalf("Timeout waiting for error to be propagated to the ClientConn")
353+
case gotErr := <-errCh.C:
354+
if gotErr == nil {
355+
t.Fatalf("got nil error from resolver, want error containing 'no RouteSpecifier'")
356+
}
357+
errStr := fmt.Sprint(gotErr)
358+
if !strings.Contains(errStr, "no RouteSpecifier") {
359+
t.Fatalf("got error from resolver %q, want error containing 'no RouteSpecifier'", errStr)
360+
}
361+
if !strings.Contains(errStr, nodeID) {
362+
t.Fatalf("got error from resolver %q, want nodeID %q", errStr, nodeID)
363+
}
328364
}
329365
}
330366

0 commit comments

Comments
 (0)