--- apiVersion: v1 items: - apiVersion: v1 kind: Pod metadata: annotations: istio.io/rev: openshift-gateway k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.133.0.34/23"],"mac_address":"0a:58:0a:85:00:22","gateway_ips":["10.133.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.133.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.133.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.133.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.133.0.1"}],"ip_address":"10.133.0.34/23","gateway_ip":"10.133.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.133.0.34" ], "mac": "0a:58:0a:85:00:22", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 prometheus.io/path: /stats/prometheus prometheus.io/port: "15020" prometheus.io/scrape: "true" seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-04-16T14:11:47Z" generateName: router-gateway-1-openshift-default-6c59fbf55c- generation: 1 labels: gateway.istio.io/managed: istio.io-gateway-controller gateway.networking.k8s.io/gateway-name: router-gateway-1 pod-template-hash: 6c59fbf55c service.istio.io/canonical-name: router-gateway-1-openshift-default service.istio.io/canonical-revision: latest sidecar.istio.io/inject: "false" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-140-244 operation: Update subresource: status time: "2026-04-16T14:11:47Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:istio.io/rev: {} f:prometheus.io/path: {} f:prometheus.io/port: {} f:prometheus.io/scrape: {} f:generateName: {} f:labels: .: {} f:gateway.istio.io/managed: {} f:gateway.networking.k8s.io/gateway-name: {} f:pod-template-hash: {} f:service.istio.io/canonical-name: {} f:service.istio.io/canonical-revision: {} f:sidecar.istio.io/inject: {} f:ownerReferences: .: {} k:{"uid":"b707a79f-c328-4b5d-8763-ad07219e29b9"}: {} f:spec: f:containers: k:{"name":"istio-proxy"}: .: {} f:args: {} f:env: .: {} k:{"name":"CA_ADDR"}: .: {} f:name: {} f:value: {} k:{"name":"GOMAXPROCS"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"GOMEMLIMIT"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"HOST_IP"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"INSTANCE_IP"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"ISTIO_CPU_LIMIT"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"ISTIO_META_APP_CONTAINERS"}: .: {} f:name: {} k:{"name":"ISTIO_META_CLUSTER_ID"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_INTERCEPTION_MODE"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_MESH_ID"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_NODE_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"ISTIO_META_OWNER"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_POD_PORTS"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_WORKLOAD_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"PILOT_CERT_PROVIDER"}: .: {} f:name: {} f:value: {} k:{"name":"POD_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"POD_NAMESPACE"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"PROXY_CONFIG"}: .: {} f:name: {} f:value: {} k:{"name":"SERVICE_ACCOUNT"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"TRUST_DOMAIN"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:ports: .: {} k:{"containerPort":15020,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":15021,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":15090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:privileged: {} f:readOnlyRootFilesystem: {} f:runAsGroup: {} f:runAsNonRoot: {} f:runAsUser: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/etc/istio/pod"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/etc/istio/proxy"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/lib/istio/data"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/credential-uds"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/istio"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/tokens"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/workload-spiffe-credentials"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/workload-spiffe-uds"}: .: {} f:mountPath: {} f:name: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: .: {} f:sysctls: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"credential-socket"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"istio-data"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"istio-envoy"}: .: {} f:emptyDir: .: {} f:medium: {} f:name: {} k:{"name":"istio-podinfo"}: .: {} f:downwardAPI: .: {} f:defaultMode: {} f:items: {} f:name: {} k:{"name":"istio-token"}: .: {} f:name: {} f:projected: .: {} f:defaultMode: {} f:sources: {} k:{"name":"istiod-ca-cert"}: .: {} f:configMap: .: {} f:defaultMode: {} f:name: {} f:name: {} k:{"name":"workload-certs"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"workload-socket"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-04-16T14:11:47Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-04-16T14:11:47Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.133.0.34"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-04-16T14:11:49Z" name: router-gateway-1-openshift-default-6c59fbf55c-p5f4j namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-gateway-1-openshift-default-6c59fbf55c uid: b707a79f-c328-4b5d-8763-ad07219e29b9 resourceVersion: "21537" uid: d8f3bbe7-bfa8-4d5e-8897-fc29dc36e324 spec: containers: - args: - proxy - router - --domain - $(POD_NAMESPACE).svc.cluster.local - --proxyLogLevel - warning - --proxyComponentLogLevel - misc:error - --log_output_level - default:info env: - name: PILOT_CERT_PROVIDER value: istiod - name: CA_ADDR value: istiod-openshift-gateway.openshift-ingress.svc:15012 - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP - name: SERVICE_ACCOUNT valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.serviceAccountName - name: HOST_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.hostIP - name: ISTIO_CPU_LIMIT valueFrom: resourceFieldRef: divisor: "0" resource: limits.cpu - name: PROXY_CONFIG value: | {"discoveryAddress":"istiod-openshift-gateway.openshift-ingress.svc:15012","proxyHeaders":{"server":{"disabled":true},"envoyDebugHeaders":{"disabled":true},"metadataExchangeHeaders":{"mode":"IN_MESH"}}} - name: ISTIO_META_POD_PORTS value: '[]' - name: ISTIO_META_APP_CONTAINERS - name: GOMEMLIMIT valueFrom: resourceFieldRef: divisor: "0" resource: limits.memory - name: GOMAXPROCS valueFrom: resourceFieldRef: divisor: "0" resource: limits.cpu - name: ISTIO_META_CLUSTER_ID value: Kubernetes - name: ISTIO_META_NODE_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName - name: ISTIO_META_INTERCEPTION_MODE value: REDIRECT - name: ISTIO_META_WORKLOAD_NAME value: router-gateway-1-openshift-default - name: ISTIO_META_OWNER value: kubernetes://apis/apps/v1/namespaces/kserve-ci-e2e-test/deployments/router-gateway-1-openshift-default - name: ISTIO_META_MESH_ID value: cluster.local - name: TRUST_DOMAIN value: cluster.local image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:d518f3d1539f45e1253c5c9fa22062802804601d4998cd50344e476a3cc388fe imagePullPolicy: IfNotPresent name: istio-proxy ports: - containerPort: 15020 name: metrics protocol: TCP - containerPort: 15021 name: status-port protocol: TCP - containerPort: 15090 name: http-envoy-prom protocol: TCP readinessProbe: failureThreshold: 4 httpGet: path: /healthz/ready port: 15021 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 1 resources: limits: cpu: "2" memory: 1Gi requests: cpu: 100m memory: 128Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL privileged: false readOnlyRootFilesystem: true runAsGroup: 1000699999 runAsNonRoot: true runAsUser: 1000699999 startupProbe: failureThreshold: 30 httpGet: path: /healthz/ready port: 15021 scheme: HTTP initialDelaySeconds: 1 periodSeconds: 1 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /var/run/secrets/workload-spiffe-uds name: workload-socket - mountPath: /var/run/secrets/credential-uds name: credential-socket - mountPath: /var/run/secrets/workload-spiffe-credentials name: workload-certs - mountPath: /var/run/secrets/istio name: istiod-ca-cert - mountPath: /var/lib/istio/data name: istio-data - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /var/run/secrets/tokens name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-7kgpj readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: router-gateway-1-openshift-default-dockercfg-9vzwr nodeName: ip-10-0-140-244.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000690000 seLinuxOptions: level: s0:c26,c20 seccompProfile: type: RuntimeDefault sysctls: - name: net.ipv4.ip_unprivileged_port_start value: "0" serviceAccount: router-gateway-1-openshift-default serviceAccountName: router-gateway-1-openshift-default terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: workload-socket - emptyDir: {} name: credential-socket - emptyDir: {} name: workload-certs - emptyDir: medium: Memory name: istio-envoy - emptyDir: {} name: istio-data - downwardAPI: defaultMode: 420 items: - fieldRef: apiVersion: v1 fieldPath: metadata.labels path: labels - fieldRef: apiVersion: v1 fieldPath: metadata.annotations path: annotations name: istio-podinfo - name: istio-token projected: defaultMode: 420 sources: - serviceAccountToken: audience: istio-ca expirationSeconds: 43200 path: istio-token - configMap: defaultMode: 420 name: istio-ca-root-cert name: istiod-ca-cert - name: kube-api-access-7kgpj projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-04-16T14:11:48Z" status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-04-16T14:11:47Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-04-16T14:11:49Z" status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-04-16T14:11:49Z" status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-04-16T14:11:47Z" status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 100m memory: 128Mi containerID: cri-o://fd2be4a5a4d96e239edcb35eeb9c33a5368798073bf8fb3ab462871d788edf07 image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:d518f3d1539f45e1253c5c9fa22062802804601d4998cd50344e476a3cc388fe imageID: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:0a86de591c0c259464e80a5c01e0c85078263846253cd50ef5ac555bcf1e4fec lastState: {} name: istio-proxy ready: true resources: limits: cpu: "2" memory: 1Gi requests: cpu: 100m memory: 128Mi restartCount: 0 started: true state: running: startedAt: "2026-04-16T14:11:47Z" user: linux: gid: 1000699999 supplementalGroups: - 1000699999 - 1000690000 uid: 1000699999 volumeMounts: - mountPath: /var/run/secrets/workload-spiffe-uds name: workload-socket - mountPath: /var/run/secrets/credential-uds name: credential-socket - mountPath: /var/run/secrets/workload-spiffe-credentials name: workload-certs - mountPath: /var/run/secrets/istio name: istiod-ca-cert - mountPath: /var/lib/istio/data name: istio-data - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /var/run/secrets/tokens name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-7kgpj readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.140.244 hostIPs: - ip: 10.0.140.244 phase: Running podIP: 10.133.0.34 podIPs: - ip: 10.133.0.34 qosClass: Burstable startTime: "2026-04-16T14:11:47Z" - apiVersion: v1 kind: Pod metadata: annotations: istio.io/rev: openshift-gateway k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.59/23"],"mac_address":"0a:58:0a:86:00:3b","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.59/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.59" ], "mac": "0a:58:0a:86:00:3b", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 prometheus.io/path: /stats/prometheus prometheus.io/port: "15020" prometheus.io/scrape: "true" seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-04-16T14:26:04Z" generateName: router-gateway-2-openshift-default-6866b85949- generation: 1 labels: gateway.istio.io/managed: istio.io-gateway-controller gateway.networking.k8s.io/gateway-name: router-gateway-2 pod-template-hash: 6866b85949 service.istio.io/canonical-name: router-gateway-2-openshift-default service.istio.io/canonical-revision: latest sidecar.istio.io/inject: "false" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-129-3 operation: Update subresource: status time: "2026-04-16T14:26:04Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:istio.io/rev: {} f:prometheus.io/path: {} f:prometheus.io/port: {} f:prometheus.io/scrape: {} f:generateName: {} f:labels: .: {} f:gateway.istio.io/managed: {} f:gateway.networking.k8s.io/gateway-name: {} f:pod-template-hash: {} f:service.istio.io/canonical-name: {} f:service.istio.io/canonical-revision: {} f:sidecar.istio.io/inject: {} f:ownerReferences: .: {} k:{"uid":"494c685b-e87a-474b-9390-ea48bba88486"}: {} f:spec: f:containers: k:{"name":"istio-proxy"}: .: {} f:args: {} f:env: .: {} k:{"name":"CA_ADDR"}: .: {} f:name: {} f:value: {} k:{"name":"GOMAXPROCS"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"GOMEMLIMIT"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"HOST_IP"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"INSTANCE_IP"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"ISTIO_CPU_LIMIT"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"ISTIO_META_APP_CONTAINERS"}: .: {} f:name: {} k:{"name":"ISTIO_META_CLUSTER_ID"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_INTERCEPTION_MODE"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_MESH_ID"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_NODE_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"ISTIO_META_OWNER"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_POD_PORTS"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_WORKLOAD_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"PILOT_CERT_PROVIDER"}: .: {} f:name: {} f:value: {} k:{"name":"POD_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"POD_NAMESPACE"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"PROXY_CONFIG"}: .: {} f:name: {} f:value: {} k:{"name":"SERVICE_ACCOUNT"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"TRUST_DOMAIN"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:ports: .: {} k:{"containerPort":15020,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":15021,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":15090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:privileged: {} f:readOnlyRootFilesystem: {} f:runAsGroup: {} f:runAsNonRoot: {} f:runAsUser: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/etc/istio/pod"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/etc/istio/proxy"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/lib/istio/data"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/credential-uds"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/istio"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/tokens"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/workload-spiffe-credentials"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/workload-spiffe-uds"}: .: {} f:mountPath: {} f:name: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: .: {} f:sysctls: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"credential-socket"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"istio-data"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"istio-envoy"}: .: {} f:emptyDir: .: {} f:medium: {} f:name: {} k:{"name":"istio-podinfo"}: .: {} f:downwardAPI: .: {} f:defaultMode: {} f:items: {} f:name: {} k:{"name":"istio-token"}: .: {} f:name: {} f:projected: .: {} f:defaultMode: {} f:sources: {} k:{"name":"istiod-ca-cert"}: .: {} f:configMap: .: {} f:defaultMode: {} f:name: {} f:name: {} k:{"name":"workload-certs"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"workload-socket"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-04-16T14:26:04Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-04-16T14:26:04Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.59"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-04-16T14:26:11Z" name: router-gateway-2-openshift-default-6866b85949-wbg65 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-gateway-2-openshift-default-6866b85949 uid: 494c685b-e87a-474b-9390-ea48bba88486 resourceVersion: "34588" uid: ce99772a-304e-4452-bff7-1497e94435cd spec: containers: - args: - proxy - router - --domain - $(POD_NAMESPACE).svc.cluster.local - --proxyLogLevel - warning - --proxyComponentLogLevel - misc:error - --log_output_level - default:info env: - name: PILOT_CERT_PROVIDER value: istiod - name: CA_ADDR value: istiod-openshift-gateway.openshift-ingress.svc:15012 - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP - name: SERVICE_ACCOUNT valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.serviceAccountName - name: HOST_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.hostIP - name: ISTIO_CPU_LIMIT valueFrom: resourceFieldRef: divisor: "0" resource: limits.cpu - name: PROXY_CONFIG value: | {"discoveryAddress":"istiod-openshift-gateway.openshift-ingress.svc:15012","proxyHeaders":{"server":{"disabled":true},"envoyDebugHeaders":{"disabled":true},"metadataExchangeHeaders":{"mode":"IN_MESH"}}} - name: ISTIO_META_POD_PORTS value: '[]' - name: ISTIO_META_APP_CONTAINERS - name: GOMEMLIMIT valueFrom: resourceFieldRef: divisor: "0" resource: limits.memory - name: GOMAXPROCS valueFrom: resourceFieldRef: divisor: "0" resource: limits.cpu - name: ISTIO_META_CLUSTER_ID value: Kubernetes - name: ISTIO_META_NODE_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName - name: ISTIO_META_INTERCEPTION_MODE value: REDIRECT - name: ISTIO_META_WORKLOAD_NAME value: router-gateway-2-openshift-default - name: ISTIO_META_OWNER value: kubernetes://apis/apps/v1/namespaces/kserve-ci-e2e-test/deployments/router-gateway-2-openshift-default - name: ISTIO_META_MESH_ID value: cluster.local - name: TRUST_DOMAIN value: cluster.local image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:d518f3d1539f45e1253c5c9fa22062802804601d4998cd50344e476a3cc388fe imagePullPolicy: IfNotPresent name: istio-proxy ports: - containerPort: 15020 name: metrics protocol: TCP - containerPort: 15021 name: status-port protocol: TCP - containerPort: 15090 name: http-envoy-prom protocol: TCP readinessProbe: failureThreshold: 4 httpGet: path: /healthz/ready port: 15021 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 1 resources: limits: cpu: "2" memory: 1Gi requests: cpu: 100m memory: 128Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL privileged: false readOnlyRootFilesystem: true runAsGroup: 1000699999 runAsNonRoot: true runAsUser: 1000699999 startupProbe: failureThreshold: 30 httpGet: path: /healthz/ready port: 15021 scheme: HTTP initialDelaySeconds: 1 periodSeconds: 1 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /var/run/secrets/workload-spiffe-uds name: workload-socket - mountPath: /var/run/secrets/credential-uds name: credential-socket - mountPath: /var/run/secrets/workload-spiffe-credentials name: workload-certs - mountPath: /var/run/secrets/istio name: istiod-ca-cert - mountPath: /var/lib/istio/data name: istio-data - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /var/run/secrets/tokens name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-2xq6g readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: router-gateway-2-openshift-default-dockercfg-7hltt nodeName: ip-10-0-129-3.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000690000 seLinuxOptions: level: s0:c26,c20 seccompProfile: type: RuntimeDefault sysctls: - name: net.ipv4.ip_unprivileged_port_start value: "0" serviceAccount: router-gateway-2-openshift-default serviceAccountName: router-gateway-2-openshift-default terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: workload-socket - emptyDir: {} name: credential-socket - emptyDir: {} name: workload-certs - emptyDir: medium: Memory name: istio-envoy - emptyDir: {} name: istio-data - downwardAPI: defaultMode: 420 items: - fieldRef: apiVersion: v1 fieldPath: metadata.labels path: labels - fieldRef: apiVersion: v1 fieldPath: metadata.annotations path: annotations name: istio-podinfo - name: istio-token projected: defaultMode: 420 sources: - serviceAccountToken: audience: istio-ca expirationSeconds: 43200 path: istio-token - configMap: defaultMode: 420 name: istio-ca-root-cert name: istiod-ca-cert - name: kube-api-access-2xq6g projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:08Z" status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:04Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:11Z" status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:11Z" status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:04Z" status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 100m memory: 128Mi containerID: cri-o://e208ba0f15f125b13fa8a881c60f8d75e75fd0187cfd4e60a3f79b99679f2146 image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:d518f3d1539f45e1253c5c9fa22062802804601d4998cd50344e476a3cc388fe imageID: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:0a86de591c0c259464e80a5c01e0c85078263846253cd50ef5ac555bcf1e4fec lastState: {} name: istio-proxy ready: true resources: limits: cpu: "2" memory: 1Gi requests: cpu: 100m memory: 128Mi restartCount: 0 started: true state: running: startedAt: "2026-04-16T14:26:07Z" user: linux: gid: 1000699999 supplementalGroups: - 1000699999 - 1000690000 uid: 1000699999 volumeMounts: - mountPath: /var/run/secrets/workload-spiffe-uds name: workload-socket - mountPath: /var/run/secrets/credential-uds name: credential-socket - mountPath: /var/run/secrets/workload-spiffe-credentials name: workload-certs - mountPath: /var/run/secrets/istio name: istiod-ca-cert - mountPath: /var/lib/istio/data name: istio-data - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /var/run/secrets/tokens name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-2xq6g readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.129.3 hostIPs: - ip: 10.0.129.3 phase: Running podIP: 10.134.0.59 podIPs: - ip: 10.134.0.59 qosClass: Burstable startTime: "2026-04-16T14:26:04Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.60/23"],"mac_address":"0a:58:0a:86:00:3c","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.60/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.60" ], "mac": "0a:58:0a:86:00:3c", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-04-16T14:26:27Z" deletionGracePeriodSeconds: 30 deletionTimestamp: "2026-04-16T14:29:50Z" generateName: router-with-refs-pd-test-kserve-559fccd7cb- generation: 2 labels: app.kubernetes.io/component: llminferenceservice-workload app.kubernetes.io/name: router-with-refs-pd-test app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: decode pod-template-hash: 559fccd7cb managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-129-3 operation: Update subresource: status time: "2026-04-16T14:26:27Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"c76706cd-8291-4c68-b17f-a98cdbb00181"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8001,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"llm-d-routing-sidecar"}: .: {} f:command: {} f:env: .: {} k:{"name":"INFERENCE_POOL_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"INFERENCE_POOL_NAMESPACE"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: {} f:restartPolicy: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} manager: kube-controller-manager operation: Update time: "2026-04-16T14:26:27Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-04-16T14:26:27Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.60"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-04-16T14:29:07Z" name: router-with-refs-pd-test-kserve-559fccd7cb-kwfkq namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-with-refs-pd-test-kserve-559fccd7cb uid: c76706cd-8291-4c68-b17f-a98cdbb00181 resourceVersion: "36855" uid: 9e845b36-1515-4741-8a77-993cd22eb17b spec: containers: - command: - /bin/bash - -c - |- if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then source /etc/profile.d/ibm-aiu-setup.sh fi if [ "$KSERVE_INFER_ROCE" = "true" ]; then echo "Trying to infer RoCE configs ... " grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null cat /proc/driver/nvidia/params KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"} echo "[Infer RoCE] Discovering active HCAs ..." active_hcas=() # Loop through all mlx5 devices found in sysfs for hca_dir in /sys/class/infiniband/mlx5_*; do # Ensure it's a directory before proceeding if [ -d "$hca_dir" ]; then hca_name=$(basename "$hca_dir") port_state_file="$hca_dir/ports/1/state" # Assume port 1 type_file="$hca_dir/ports/1/gid_attrs/types/*" echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'" if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then echo "[Infer RoCE] Found active HCA: $hca_name" active_hcas+=("$hca_name") else echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name" fi fi done ucx_hcas=() for hca in "${active_hcas[@]}"; do ucx_hcas+=("${hca}:1") done # Check if we found any active HCAs if [ ${#active_hcas[@]} -gt 0 ]; then # Join the array elements with a comma hcas=$(IFS=,; echo "${active_hcas[*]}") echo "[Infer RoCE] Setting active HCAs: ${hcas}" export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}} export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}} export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}} echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}" echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}" else echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set." fi if [ ${#active_hcas[@]} -gt 0 ]; then echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..." # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs declare -A gid_index_count declare -A hca_gid_index for hca_name in "${active_hcas[@]}"; do echo "[Infer RoCE] Processing HCA: ${hca_name}" # Find all RoCE v2 IPv4 GIDs for this HCA and count by index for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then idx=$(basename "$tpath") gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}" # Check for IPv4 GID (contains ffff:) if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then gid_value=$(cat "$gid_file" 2>/dev/null || echo "") echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}" hca_gid_index["${hca_name}"]="${idx}" gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1)) break # Use first found IPv4 GID per HCA fi fi done done # Find the most common GID index (most likely to be consistent across nodes) best_gid_index="" max_count=0 for idx in "${!gid_index_count[@]}"; do count=${gid_index_count["${idx}"]} echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs" if [ $count -gt $max_count ]; then max_count=$count best_gid_index="$idx" fi done # Use deterministic fallback if counts are equal - prefer lower index number if [ ${#gid_index_count[@]} -gt 1 ]; then echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}" # If there's a tie, prefer index 3 as it's most common in SR-IOV setups if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then best_gid_index="3" echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)" fi fi # Check if GID_INDEX is already set via environment variables if [ -n "${NCCL_IB_GID_INDEX}" ]; then echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment" export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX" elif [ -n "$best_gid_index" ]; then echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)" export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index} export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index} echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX" else echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA." fi else echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference." fi fi eval "vllm serve /mnt/models \ --served-model-name "facebook/opt-125m" \ --port 8001 \ --disable-uvicorn-access-log \ --enable-ssl-refresh \ --ssl-certfile /var/run/kserve/tls/tls.crt \ --ssl-keyfile /var/run/kserve/tls/tls.key \ ${VLLM_ADDITIONAL_ARGS} \ $@" - -- env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: DEBUG - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.17.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 8 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 180 periodSeconds: 30 successThreshold: 1 timeoutSeconds: 30 name: main ports: - containerPort: 8001 protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: false runAsNonRoot: true runAsUser: 1000690000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8001 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt/models name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwjtj readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-lsh9l - name: router-with-refs-pd-test-kserve-dockercfg-gz97h initContainers: - command: - /app/pd-sidecar - --port=8000 - --vllm-port=8001 - --connector=nixlv2 - --enable-ssrf-protection=true - --pool-group=inference.networking.x-k8s.io - --secure-proxy=true - --cert-path=/var/run/kserve/tls - --decoder-use-tls=true - --prefiller-use-tls=true env: - name: INFERENCE_POOL_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs - name: INFERENCE_POOL_NAME value: router-with-refs-pd-test-inference-pool image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 name: llm-d-routing-sidecar ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 10 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: {} restartPolicy: Always securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: false runAsNonRoot: false runAsUser: 1000690000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwjtj readOnly: true - args: - hf://facebook/opt-125m - /mnt/models env: - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" image: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589 imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000690000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwjtj readOnly: true nodeName: ip-10-0-129-3.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000690000 seLinuxOptions: level: s0:c26,c20 seccompProfile: type: RuntimeDefault serviceAccount: router-with-refs-pd-test-kserve serviceAccountName: router-with-refs-pd-test-kserve terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - name: tls-certs secret: defaultMode: 420 secretName: router-with-refs-pd-test-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - name: kube-api-access-xwjtj projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:28Z" status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:33Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-04-16T14:29:07Z" status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-04-16T14:29:07Z" status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:27Z" status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://21760630dd46ed09ed07d7d1f26a946c9c40ec7c87726c516a548115251cb2a7 image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.17.1 imageID: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo@sha256:d19978a2d4bb2289c740a6c89d4cc15fbcf4d20d916f1e268168b8bbad3b776b lastState: {} name: main ready: true resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: true state: running: startedAt: "2026-04-16T14:26:33Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000690000 uid: 1000690000 volumeMounts: - mountPath: /home name: home - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt/models name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwjtj readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.129.3 hostIPs: - ip: 10.0.129.3 initContainerStatuses: - containerID: cri-o://c0ca5e3249cea969ec1829d8d646e3020ce684adcca1be6aa374138041f9a452 image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.7.1 imageID: ghcr.io/llm-d/llm-d-routing-sidecar@sha256:14ff2530c83bd6f95fa5b25309b150623b403da83f9152f635858f02163e2f95 lastState: {} name: llm-d-routing-sidecar ready: true resources: {} restartCount: 0 started: true state: running: startedAt: "2026-04-16T14:26:27Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000690000 uid: 1000690000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwjtj readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://36e9be3ab6226725f4aa608c7c9217437015547a8335db28208350681746da80 image: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589 imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://36e9be3ab6226725f4aa608c7c9217437015547a8335db28208350681746da80 exitCode: 0 finishedAt: "2026-04-16T14:26:32Z" reason: Completed startedAt: "2026-04-16T14:26:28Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000690000 uid: 1000690000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwjtj readOnly: true recursiveReadOnly: Disabled phase: Running podIP: 10.134.0.60 podIPs: - ip: 10.134.0.60 qosClass: Burstable startTime: "2026-04-16T14:26:27Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.61/23"],"mac_address":"0a:58:0a:86:00:3d","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.61/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.61" ], "mac": "0a:58:0a:86:00:3d", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-04-16T14:26:27Z" deletionGracePeriodSeconds: 30 deletionTimestamp: "2026-04-16T14:29:50Z" generateName: router-with-refs-pd-test-kserve-prefill-7ff6579bf7- generation: 2 labels: app.kubernetes.io/component: llminferenceservice-workload-prefill app.kubernetes.io/name: router-with-refs-pd-test app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: prefill pod-template-hash: 7ff6579bf7 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-129-3 operation: Update subresource: status time: "2026-04-16T14:26:27Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"e1709892-ea58-49ac-9cad-fe9dd0b8906d"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} manager: kube-controller-manager operation: Update time: "2026-04-16T14:26:27Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-04-16T14:26:27Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.61"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-04-16T14:29:07Z" name: router-with-refs-pd-test-kserve-prefill-7ff6579bf7-msls9 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-with-refs-pd-test-kserve-prefill-7ff6579bf7 uid: e1709892-ea58-49ac-9cad-fe9dd0b8906d resourceVersion: "36853" uid: 035ca082-6c53-4429-ba8f-c54e6cbfbc94 spec: containers: - command: - /bin/bash - -c - |- if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then source /etc/profile.d/ibm-aiu-setup.sh fi if [ "$KSERVE_INFER_ROCE" = "true" ]; then echo "Trying to infer RoCE configs ... " grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null cat /proc/driver/nvidia/params KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"} echo "[Infer RoCE] Discovering active HCAs ..." active_hcas=() # Loop through all mlx5 devices found in sysfs for hca_dir in /sys/class/infiniband/mlx5_*; do # Ensure it's a directory before proceeding if [ -d "$hca_dir" ]; then hca_name=$(basename "$hca_dir") port_state_file="$hca_dir/ports/1/state" # Assume port 1 type_file="$hca_dir/ports/1/gid_attrs/types/*" echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'" if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then echo "[Infer RoCE] Found active HCA: $hca_name" active_hcas+=("$hca_name") else echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name" fi fi done ucx_hcas=() for hca in "${active_hcas[@]}"; do ucx_hcas+=("${hca}:1") done # Check if we found any active HCAs if [ ${#active_hcas[@]} -gt 0 ]; then # Join the array elements with a comma hcas=$(IFS=,; echo "${active_hcas[*]}") echo "[Infer RoCE] Setting active HCAs: ${hcas}" export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}} export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}} export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}} echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}" echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}" else echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set." fi if [ ${#active_hcas[@]} -gt 0 ]; then echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..." # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs declare -A gid_index_count declare -A hca_gid_index for hca_name in "${active_hcas[@]}"; do echo "[Infer RoCE] Processing HCA: ${hca_name}" # Find all RoCE v2 IPv4 GIDs for this HCA and count by index for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then idx=$(basename "$tpath") gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}" # Check for IPv4 GID (contains ffff:) if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then gid_value=$(cat "$gid_file" 2>/dev/null || echo "") echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}" hca_gid_index["${hca_name}"]="${idx}" gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1)) break # Use first found IPv4 GID per HCA fi fi done done # Find the most common GID index (most likely to be consistent across nodes) best_gid_index="" max_count=0 for idx in "${!gid_index_count[@]}"; do count=${gid_index_count["${idx}"]} echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs" if [ $count -gt $max_count ]; then max_count=$count best_gid_index="$idx" fi done # Use deterministic fallback if counts are equal - prefer lower index number if [ ${#gid_index_count[@]} -gt 1 ]; then echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}" # If there's a tie, prefer index 3 as it's most common in SR-IOV setups if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then best_gid_index="3" echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)" fi fi # Check if GID_INDEX is already set via environment variables if [ -n "${NCCL_IB_GID_INDEX}" ]; then echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment" export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX" elif [ -n "$best_gid_index" ]; then echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)" export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index} export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index} echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX" else echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA." fi else echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference." fi fi eval "vllm serve /mnt/models \ --served-model-name "facebook/opt-125m" \ --port 8000 \ --disable-uvicorn-access-log \ --enable-ssl-refresh \ --ssl-certfile /var/run/kserve/tls/tls.crt \ --ssl-keyfile /var/run/kserve/tls/tls.key \ ${VLLM_ADDITIONAL_ARGS} \ $@" - -- env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: DEBUG - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.17.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 8 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 180 periodSeconds: 30 successThreshold: 1 timeoutSeconds: 30 name: main ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: false runAsNonRoot: true runAsUser: 1000690000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /home name: home - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt/models name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5lqfs readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-lsh9l initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" image: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589 imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000690000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5lqfs readOnly: true nodeName: ip-10-0-129-3.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000690000 seLinuxOptions: level: s0:c26,c20 seccompProfile: type: RuntimeDefault serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - name: tls-certs secret: defaultMode: 420 secretName: router-with-refs-pd-test-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - name: kube-api-access-5lqfs projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:28Z" status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:32Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-04-16T14:29:07Z" status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-04-16T14:29:07Z" status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-04-16T14:26:27Z" status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://8ecd65ab6626f2fc261884b8190ff150b2f4702e5e6151ee57696d7ea883dac9 image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.17.1 imageID: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo@sha256:d19978a2d4bb2289c740a6c89d4cc15fbcf4d20d916f1e268168b8bbad3b776b lastState: {} name: main ready: true resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: true state: running: startedAt: "2026-04-16T14:26:32Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000690000 uid: 1000690000 volumeMounts: - mountPath: /home name: home - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt/models name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5lqfs readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.129.3 hostIPs: - ip: 10.0.129.3 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://26d9a4067d610c3019af3ca84e302a3621f8e60c3852a2fb58d4a767cfa8b008 image: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589 imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://26d9a4067d610c3019af3ca84e302a3621f8e60c3852a2fb58d4a767cfa8b008 exitCode: 0 finishedAt: "2026-04-16T14:26:32Z" reason: Completed startedAt: "2026-04-16T14:26:27Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000690000 uid: 1000690000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5lqfs readOnly: true recursiveReadOnly: Disabled phase: Running podIP: 10.134.0.61 podIPs: - ip: 10.134.0.61 qosClass: Burstable startTime: "2026-04-16T14:26:27Z" kind: PodList metadata: resourceVersion: "37247"