Warning: Failed to get DSC: the server could not find the requested resource Initial Kueue managementState: === RUN TestDefaultClusterTrainingRuntimes test_tag.go:37: Test tier 'Sanity' doesn't match expected tier 'Smoke' --- SKIP: TestDefaultClusterTrainingRuntimes (0.00s) === RUN TestDefaultTrainingHubRuntimesMatchDefaultClusterRuntimes test_tag.go:37: Test tier 'Sanity' doesn't match expected tier 'Smoke' --- SKIP: TestDefaultTrainingHubRuntimesMatchDefaultClusterRuntimes (0.00s) === RUN TestRunTrainJobWithDefaultClusterTrainingRuntimes cluster_training_runtimes_test.go:161: Running TrainJob with ClusterTrainingRuntime: torch-distributed cluster_training_runtimes_test.go:167: Created TrainJob test-ns-rpqzd/test-trainjob-5hfnn successfully cluster_training_runtimes_test.go:178: TrainJob with ClusterTrainingRuntime 'torch-distributed' completed successfully cluster_training_runtimes_test.go:161: Running TrainJob with ClusterTrainingRuntime: torch-distributed-rocm cluster_training_runtimes_test.go:167: Created TrainJob test-ns-bt7gs/test-trainjob-c4269 successfully cluster_training_runtimes_test.go:178: TrainJob with ClusterTrainingRuntime 'torch-distributed-rocm' completed successfully cluster_training_runtimes_test.go:161: Running TrainJob with ClusterTrainingRuntime: torch-distributed-cpu cluster_training_runtimes_test.go:167: Created TrainJob test-ns-qt5wp/test-trainjob-frn5v successfully cluster_training_runtimes_test.go:178: TrainJob with ClusterTrainingRuntime 'torch-distributed-cpu' completed successfully cluster_training_runtimes_test.go:161: Running TrainJob with ClusterTrainingRuntime: torch-distributed-cuda128-torch29-py312 cluster_training_runtimes_test.go:167: Created TrainJob test-ns-x6xd5/test-trainjob-294gj successfully cluster_training_runtimes_test.go:178: TrainJob with ClusterTrainingRuntime 'torch-distributed-cuda128-torch29-py312' completed successfully cluster_training_runtimes_test.go:161: Running TrainJob with ClusterTrainingRuntime: torch-distributed-rocm64-torch29-py312 cluster_training_runtimes_test.go:167: Created TrainJob test-ns-h4wbt/test-trainjob-n877d successfully panic: test timed out after 30m0s running tests: TestRunTrainJobWithDefaultClusterTrainingRuntimes (30m0s) goroutine 1930 [running]: testing.(*M).startAlarm.func1() /usr/lib/golang/src/testing/testing.go:2682 +0x345 created by time.goFunc /usr/lib/golang/src/time/sleep.go:215 +0x2d goroutine 1 [chan receive, 30 minutes]: testing.(*T).Run(0xc000183180, {0x24145ba?, 0xc000073aa8?}, 0x25ae198) /usr/lib/golang/src/testing/testing.go:2005 +0x485 testing.runTests.func1(0xc000183180) /usr/lib/golang/src/testing/testing.go:2477 +0x37 testing.tRunner(0xc000183180, 0xc000073be8) /usr/lib/golang/src/testing/testing.go:1934 +0xea testing.runTests(0xc0001b2828, {0x3b0b280, 0x3a, 0x3a}, {0x0?, 0x20?, 0x3c44a20?}) /usr/lib/golang/src/testing/testing.go:2475 +0x4b4 testing.(*M).Run(0xc0002a5680) /usr/lib/golang/src/testing/testing.go:2337 +0x63a github.com/opendatahub-io/distributed-workloads/tests/trainer.TestMain(0xc0002a5680) /workspace/source/dw_src/tests/trainer/trainer_kueue_integration_test.go:43 +0xbf main.main() _testmain.go:161 +0xa5 goroutine 91 [select]: github.com/onsi/gomega/internal.(*AsyncAssertion).match(0xc00028b110, {0x279caa8, 0xc000193f20}, 0x1, {0x0, 0x0, 0x0}) /opt/app-root/src/go/pkg/mod/github.com/onsi/gomega@v1.38.2/internal/async_assertion.go:558 +0x734 github.com/onsi/gomega/internal.(*AsyncAssertion).Should(0xc00028b110, {0x279caa8, 0xc000193f20}, {0x0, 0x0, 0x0}) /opt/app-root/src/go/pkg/mod/github.com/onsi/gomega@v1.38.2/internal/async_assertion.go:145 +0x85 github.com/opendatahub-io/distributed-workloads/tests/trainer.TestRunTrainJobWithDefaultClusterTrainingRuntimes(0xc0001836c0) /workspace/source/dw_src/tests/trainer/cluster_training_runtimes_test.go:171 +0x495 testing.tRunner(0xc0001836c0, 0x25ae198) /usr/lib/golang/src/testing/testing.go:1934 +0xea created by testing.(*T).Run in goroutine 1 /usr/lib/golang/src/testing/testing.go:1997 +0x465 goroutine 87 [IO wait]: internal/poll.runtime_pollWait(0x7f46cccc9600, 0x72) /usr/lib/golang/src/runtime/netpoll.go:351 +0x85 internal/poll.(*pollDesc).wait(0xc00017c880?, 0xc000254a80?, 0x0) /usr/lib/golang/src/internal/poll/fd_poll_runtime.go:84 +0x27 internal/poll.(*pollDesc).waitRead(...) /usr/lib/golang/src/internal/poll/fd_poll_runtime.go:89 internal/poll.(*FD).Read(0xc00017c880, {0xc000254a80, 0xa80, 0xa80}) /usr/lib/golang/src/internal/poll/fd_unix.go:165 +0x279 net.(*netFD).Read(0xc00017c880, {0xc000254a80?, 0xc000254adb?, 0x5?}) /usr/lib/golang/src/net/fd_posix.go:68 +0x25 net.(*conn).Read(0xc000590268, {0xc000254a80?, 0x7f46cc163280?, 0x7f47138b3a78?}) /usr/lib/golang/src/net/net.go:196 +0x45 crypto/tls.(*atLeastReader).Read(0xc0006866c0, {0xc000254a80?, 0xa20?, 0x8?}) /usr/lib/golang/src/crypto/tls/conn.go:819 +0x3b bytes.(*Buffer).ReadFrom(0xc00013cd28, {0x278c440, 0xc0006866c0}) /usr/lib/golang/src/bytes/buffer.go:217 +0x98 crypto/tls.(*Conn).readFromUntil(0xc00013ca88, {0x278f9c0, 0xc000590268}, 0xc0004b19d0?) /usr/lib/golang/src/crypto/tls/conn.go:841 +0xde crypto/tls.(*Conn).readRecordOrCCS(0xc00013ca88, 0x0) /usr/lib/golang/src/crypto/tls/conn.go:630 +0x3db crypto/tls.(*Conn).readRecord(...) /usr/lib/golang/src/crypto/tls/conn.go:592 crypto/tls.(*Conn).Read(0xc00013ca88, {0xc0005e9000, 0x1000, 0x9de680?}) /usr/lib/golang/src/crypto/tls/conn.go:1397 +0x145 bufio.(*Reader).Read(0xc0005e6360, {0xc0005e2204, 0x9, 0x9ec04e?}) /usr/lib/golang/src/bufio/bufio.go:245 +0x197 io.ReadAtLeast({0x278b8c0, 0xc0005e6360}, {0xc0005e2204, 0x9, 0x9}, 0x9) /usr/lib/golang/src/io/io.go:335 +0x8e io.ReadFull(...) /usr/lib/golang/src/io/io.go:354 golang.org/x/net/http2.readFrameHeader({0xc0005e2204, 0x9, 0xc000000e2b?}, {0x278b8c0?, 0xc0005e6360?}) /opt/app-root/src/go/pkg/mod/golang.org/x/net@v0.48.0/http2/frame.go:242 +0x65 golang.org/x/net/http2.(*Framer).ReadFrameHeader(0xc0005e21c0) /opt/app-root/src/go/pkg/mod/golang.org/x/net@v0.48.0/http2/frame.go:505 +0x6b golang.org/x/net/http2.(*Framer).ReadFrame(0xc0005e21c0) /opt/app-root/src/go/pkg/mod/golang.org/x/net@v0.48.0/http2/frame.go:564 +0x18 golang.org/x/net/http2.(*clientConnReadLoop).run(0xc0004b1fa8) /opt/app-root/src/go/pkg/mod/golang.org/x/net@v0.48.0/http2/transport.go:2258 +0xca golang.org/x/net/http2.(*ClientConn).readLoop(0xc000485a40) /opt/app-root/src/go/pkg/mod/golang.org/x/net@v0.48.0/http2/transport.go:2127 +0x52 created by golang.org/x/net/http2.(*Transport).newClientConn in goroutine 86 /opt/app-root/src/go/pkg/mod/golang.org/x/net@v0.48.0/http2/transport.go:880 +0xda5 FAIL github.com/opendatahub-io/distributed-workloads/tests/trainer 1800.055s FAIL