apiVersion: v1 items: - apiVersion: ray.io/v1 kind: RayCluster metadata: annotations: odh.ray.io/secure-trusted-network: "true" creationTimestamp: "2026-05-25T19:09:07Z" finalizers: - ray.io/authentication-resources generation: 1 labels: ray.io/originated-from-cr-name: long-running ray.io/originated-from-crd: RayJob name: long-running-z74d6 namespace: test-ns-5d74m ownerReferences: - apiVersion: ray.io/v1 blockOwnerDeletion: true controller: true kind: RayJob name: long-running uid: 679709d6-91c0-4312-99cf-bc3014bef418 resourceVersion: "45734" uid: f5a30e1b-b901-4889-86b2-6ac8017ebec7 spec: headGroupSpec: enableIngress: false rayStartParams: dashboard-host: 0.0.0.0 template: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-head ports: - containerPort: 6379 name: gcs-server protocol: TCP - containerPort: 8000 name: serve protocol: TCP - containerPort: 8265 name: dashboard protocol: TCP - containerPort: 10001 name: client protocol: TCP resources: limits: cpu: "2" memory: 10G requests: cpu: 500m memory: 6G volumeMounts: - mountPath: /home/ray/jobs name: jobs volumes: - configMap: name: jobs name: jobs rayVersion: 2.46.0 workerGroupSpecs: - groupName: small-group maxReplicas: 1 minReplicas: 1 numOfHosts: 1 rayStartParams: num-cpus: "1" replicas: 1 scaleStrategy: {} template: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-worker resources: limits: cpu: "1" memory: 3G requests: cpu: 500m memory: 1G status: conditions: - lastTransitionTime: "2026-05-25T19:09:07Z" message: 'containers with unready status: [ray-head kube-rbac-proxy]' reason: ContainersNotReady status: "False" type: HeadPodReady - lastTransitionTime: "2026-05-25T19:09:07Z" message: RayCluster Pods are being provisioned for first time reason: RayClusterPodsProvisioning status: "False" type: RayClusterProvisioned - lastTransitionTime: "2026-05-25T19:09:07Z" message: "" reason: RayClusterSuspended status: "False" type: RayClusterSuspended - lastTransitionTime: "2026-05-25T19:09:07Z" message: "" reason: RayClusterSuspending status: "False" type: RayClusterSuspending - lastTransitionTime: "2026-05-25T19:09:07Z" message: 'Authentication resources created successfully (mode: IntegratedOAuth)' observedGeneration: 1 reason: AuthenticationResourcesCreated status: "True" type: AuthenticationReady desiredCPU: "1" desiredGPU: "0" desiredMemory: 7G desiredTPU: "0" desiredWorkerReplicas: 1 endpoints: client: "10001" dashboard: "8265" gcs-server: "6379" metrics: "8080" serve: "8000" head: podIP: 10.132.0.46 podName: long-running-z74d6-head-z65th serviceIP: 10.132.0.46 serviceName: long-running-z74d6-head-svc lastUpdateTime: "2026-05-25T19:09:10Z" maxWorkerReplicas: 1 minWorkerReplicas: 1 observedGeneration: 1 - apiVersion: ray.io/v1 kind: RayCluster metadata: annotations: odh.ray.io/secure-trusted-network: "true" creationTimestamp: "2026-05-25T18:54:05Z" finalizers: - ray.io/authentication-resources generation: 1 name: raycluster namespace: test-ns-fmjcp resourceVersion: "37158" uid: 034fa204-3dac-4f33-8f43-6388b76ea05d spec: headGroupSpec: enableIngress: false rayStartParams: dashboard-host: 0.0.0.0 template: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-head ports: - containerPort: 6379 name: gcs-server protocol: TCP - containerPort: 8000 name: serve protocol: TCP - containerPort: 8265 name: dashboard protocol: TCP - containerPort: 10001 name: client protocol: TCP resources: limits: cpu: "2" memory: 10G requests: cpu: 500m memory: 6G volumeMounts: - mountPath: /home/ray/jobs name: jobs volumes: - configMap: name: jobs name: jobs rayVersion: 2.46.0 workerGroupSpecs: - groupName: small-group maxReplicas: 1 minReplicas: 1 numOfHosts: 1 rayStartParams: num-cpus: "1" replicas: 1 scaleStrategy: {} template: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-worker resources: limits: cpu: "1" memory: 3G requests: cpu: 500m memory: 1G status: availableWorkerReplicas: 1 conditions: - lastTransitionTime: "2026-05-25T18:54:28Z" message: "" reason: HeadPodRunningAndReady status: "True" type: HeadPodReady - lastTransitionTime: "2026-05-25T18:54:30Z" message: All Ray Pods are ready for the first time reason: AllPodRunningAndReadyFirstTime status: "True" type: RayClusterProvisioned - lastTransitionTime: "2026-05-25T18:54:05Z" message: "" reason: RayClusterSuspended status: "False" type: RayClusterSuspended - lastTransitionTime: "2026-05-25T18:54:05Z" message: "" reason: RayClusterSuspending status: "False" type: RayClusterSuspending - lastTransitionTime: "2026-05-25T18:54:05Z" message: 'Authentication resources created successfully (mode: IntegratedOAuth)' observedGeneration: 1 reason: AuthenticationResourcesCreated status: "True" type: AuthenticationReady desiredCPU: "1" desiredGPU: "0" desiredMemory: 7G desiredTPU: "0" desiredWorkerReplicas: 1 endpoints: client: "10001" dashboard: "8265" gcs-server: "6379" metrics: "8080" serve: "8000" head: podIP: 10.133.0.18 podName: raycluster-head-n54fl serviceIP: 10.133.0.18 serviceName: raycluster-head-svc lastUpdateTime: "2026-05-25T18:54:30Z" maxWorkerReplicas: 1 minWorkerReplicas: 1 observedGeneration: 1 readyWorkerReplicas: 1 state: ready stateTransitionTimes: ready: "2026-05-25T18:54:30Z" - apiVersion: ray.io/v1 kind: RayJob metadata: creationTimestamp: "2026-05-25T19:09:02Z" generation: 1 name: invalid-yamlstr namespace: test-ns-5d74m resourceVersion: "45489" uid: e9893abd-daf7-41e5-a26f-72561b909c97 spec: backoffLimit: 0 entrypoint: python /home/ray/jobs/counter.py rayClusterSpec: headGroupSpec: rayStartParams: dashboard-host: 0.0.0.0 template: spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-head ports: - containerPort: 6379 name: gcs-server protocol: TCP - containerPort: 8000 name: serve protocol: TCP - containerPort: 8265 name: dashboard protocol: TCP - containerPort: 10001 name: client protocol: TCP resources: limits: cpu: "2" memory: 10G requests: cpu: 500m memory: 6G volumeMounts: - mountPath: /home/ray/jobs name: jobs volumes: - configMap: name: jobs name: jobs rayVersion: 2.46.0 workerGroupSpecs: - groupName: small-group maxReplicas: 1 minReplicas: 1 numOfHosts: 1 rayStartParams: num-cpus: "1" replicas: 1 template: spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-worker resources: limits: cpu: "1" memory: 3G requests: cpu: 500m memory: 1G runtimeEnvYAML: invalid_yaml_string submissionMode: K8sJobMode ttlSecondsAfterFinished: 0 - apiVersion: ray.io/v1 kind: RayJob metadata: creationTimestamp: "2026-05-25T19:09:07Z" finalizers: - ray.io/rayjob-finalizer generation: 2 name: long-running namespace: test-ns-5d74m resourceVersion: "45800" uid: 679709d6-91c0-4312-99cf-bc3014bef418 spec: activeDeadlineSeconds: 5 backoffLimit: 0 entrypoint: python /home/ray/jobs/long_running.py rayClusterSpec: headGroupSpec: rayStartParams: dashboard-host: 0.0.0.0 template: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-head ports: - containerPort: 6379 name: gcs-server protocol: TCP - containerPort: 8000 name: serve protocol: TCP - containerPort: 8265 name: dashboard protocol: TCP - containerPort: 10001 name: client protocol: TCP resources: limits: cpu: "2" memory: 10G requests: cpu: 500m memory: 6G volumeMounts: - mountPath: /home/ray/jobs name: jobs volumes: - configMap: name: jobs name: jobs rayVersion: 2.46.0 workerGroupSpecs: - groupName: small-group maxReplicas: 1 minReplicas: 1 numOfHosts: 1 rayStartParams: num-cpus: "1" replicas: 1 scaleStrategy: {} template: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-worker resources: limits: cpu: "1" memory: 3G requests: cpu: 500m memory: 1G shutdownAfterJobFinishes: true submissionMode: K8sJobMode submitterPodTemplate: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-job-submitter resources: limits: cpu: 500m memory: 500Mi requests: cpu: 200m memory: 200Mi restartPolicy: Never ttlSecondsAfterFinished: 600 status: endTime: "2026-05-25T19:09:13Z" failed: 1 jobDeploymentStatus: Failed jobId: long-running-mwmg8 message: 'The RayJob has passed the activeDeadlineSeconds. StartTime: 2026-05-25 19:09:07 +0000 UTC. ActiveDeadlineSeconds: 5' rayClusterName: long-running-z74d6 rayClusterStatus: desiredCPU: "0" desiredGPU: "0" desiredMemory: "0" desiredTPU: "0" head: {} rayJobInfo: {} reason: DeadlineExceeded startTime: "2026-05-25T19:09:07Z" succeeded: 0 - apiVersion: ray.io/v1 kind: RayJob metadata: creationTimestamp: "2026-05-25T19:09:17Z" finalizers: - ray.io/rayjob-finalizer generation: 2 name: counter namespace: test-ns-fmjcp resourceVersion: "45975" uid: 1b6fe549-caa6-4b29-b154-0df2727b1fe8 spec: backoffLimit: 0 clusterSelector: ray.io/cluster: raycluster entrypoint: python /home/ray/jobs/counter.py runtimeEnvYAML: |2 env_vars: counter_name: test_counter submissionMode: K8sJobMode submitterPodTemplate: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-job-submitter resources: limits: cpu: 500m memory: 500Mi requests: cpu: 200m memory: 200Mi restartPolicy: Never ttlSecondsAfterFinished: 0 status: dashboardURL: raycluster-head-svc.test-ns-fmjcp.svc.cluster.local:8265 endTime: "2026-05-25T19:09:36Z" failed: 0 jobDeploymentStatus: Complete jobId: counter-jgbtk jobStatus: SUCCEEDED message: Job finished successfully. rayClusterName: raycluster rayClusterStatus: availableWorkerReplicas: 1 conditions: - lastTransitionTime: "2026-05-25T18:54:28Z" message: "" reason: HeadPodRunningAndReady status: "True" type: HeadPodReady - lastTransitionTime: "2026-05-25T18:54:30Z" message: All Ray Pods are ready for the first time reason: AllPodRunningAndReadyFirstTime status: "True" type: RayClusterProvisioned - lastTransitionTime: "2026-05-25T18:54:05Z" message: "" reason: RayClusterSuspended status: "False" type: RayClusterSuspended - lastTransitionTime: "2026-05-25T18:54:05Z" message: "" reason: RayClusterSuspending status: "False" type: RayClusterSuspending - lastTransitionTime: "2026-05-25T18:54:05Z" message: 'Authentication resources created successfully (mode: IntegratedOAuth)' observedGeneration: 1 reason: AuthenticationResourcesCreated status: "True" type: AuthenticationReady desiredCPU: "1" desiredGPU: "0" desiredMemory: 7G desiredTPU: "0" desiredWorkerReplicas: 1 endpoints: client: "10001" dashboard: "8265" gcs-server: "6379" metrics: "8080" serve: "8000" head: podIP: 10.133.0.18 podName: raycluster-head-n54fl serviceIP: 10.133.0.18 serviceName: raycluster-head-svc lastUpdateTime: "2026-05-25T18:54:30Z" maxWorkerReplicas: 1 minWorkerReplicas: 1 observedGeneration: 1 readyWorkerReplicas: 1 state: ready stateTransitionTimes: ready: "2026-05-25T18:54:30Z" rayJobInfo: endTime: "2026-05-25T19:09:28Z" startTime: "2026-05-25T19:09:24Z" startTime: "2026-05-25T19:09:17Z" succeeded: 1 - apiVersion: ray.io/v1 kind: RayJob metadata: creationTimestamp: "2026-05-25T19:09:30Z" finalizers: - ray.io/rayjob-finalizer generation: 2 name: fail namespace: test-ns-fmjcp resourceVersion: "45991" uid: 711b556c-3e11-4ed6-8e7b-944af8424b11 spec: backoffLimit: 0 clusterSelector: ray.io/cluster: raycluster entrypoint: python /home/ray/jobs/fail.py submissionMode: K8sJobMode submitterPodTemplate: metadata: {} spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-job-submitter resources: limits: cpu: 500m memory: 500Mi requests: cpu: 200m memory: 200Mi restartPolicy: Never ttlSecondsAfterFinished: 0 status: dashboardURL: raycluster-head-svc.test-ns-fmjcp.svc.cluster.local:8265 failed: 0 jobDeploymentStatus: Running jobId: fail-znh6f jobStatus: FAILED message: "Job entrypoint command failed with exit code 1, last available logs (truncated to 20,000 chars):\n2026-05-25 19:09:37,399\tINFO job_manager.py:587 -- Runtime env is setting up.\nRunning entrypoint for job fail-znh6f: python /home/ray/jobs/fail.py\nSomething is seriously wrong.\n" rayClusterName: raycluster rayClusterStatus: availableWorkerReplicas: 1 conditions: - lastTransitionTime: "2026-05-25T18:54:28Z" message: "" reason: HeadPodRunningAndReady status: "True" type: HeadPodReady - lastTransitionTime: "2026-05-25T18:54:30Z" message: All Ray Pods are ready for the first time reason: AllPodRunningAndReadyFirstTime status: "True" type: RayClusterProvisioned - lastTransitionTime: "2026-05-25T18:54:05Z" message: "" reason: RayClusterSuspended status: "False" type: RayClusterSuspended - lastTransitionTime: "2026-05-25T18:54:05Z" message: "" reason: RayClusterSuspending status: "False" type: RayClusterSuspending - lastTransitionTime: "2026-05-25T18:54:05Z" message: 'Authentication resources created successfully (mode: IntegratedOAuth)' observedGeneration: 1 reason: AuthenticationResourcesCreated status: "True" type: AuthenticationReady desiredCPU: "1" desiredGPU: "0" desiredMemory: 7G desiredTPU: "0" desiredWorkerReplicas: 1 endpoints: client: "10001" dashboard: "8265" gcs-server: "6379" metrics: "8080" serve: "8000" head: podIP: 10.133.0.18 podName: raycluster-head-n54fl serviceIP: 10.133.0.18 serviceName: raycluster-head-svc lastUpdateTime: "2026-05-25T18:54:30Z" maxWorkerReplicas: 1 minWorkerReplicas: 1 observedGeneration: 1 readyWorkerReplicas: 1 state: ready stateTransitionTimes: ready: "2026-05-25T18:54:30Z" rayJobInfo: endTime: "2026-05-25T19:09:38Z" startTime: "2026-05-25T19:09:37Z" startTime: "2026-05-25T19:09:30Z" succeeded: 0 - apiVersion: ray.io/v1 kind: RayJob metadata: creationTimestamp: "2026-05-25T19:09:14Z" generation: 1 name: managed-externally namespace: test-ns-fmjcp resourceVersion: "45810" uid: 4be02706-6d7e-4f17-a28b-d9b5173bbf2f spec: backoffLimit: 0 clusterSelector: ray.io/cluster: raycluster entrypoint: python /home/ray/jobs/counter.py managedBy: kueue.x-k8s.io/multikueue runtimeEnvYAML: |2 env_vars: counter_name: test_counter submissionMode: K8sJobMode submitterPodTemplate: spec: containers: - image: quay.io/modh/ray@sha256:42fbc5d898cb9c7d202ee89308ef328838d42985ec384f2476d8f3356acd01cb name: ray-job-submitter resources: limits: cpu: 500m memory: 500Mi requests: cpu: 200m memory: 200Mi restartPolicy: Never ttlSecondsAfterFinished: 0 kind: List metadata: resourceVersion: ""