--- apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.133.0.47/23"],"mac_address":"0a:58:0a:85:00:2f","gateway_ips":["10.133.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.133.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.133.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.133.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.133.0.1"}],"ip_address":"10.133.0.47/23","gateway_ip":"10.133.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.133.0.47" ], "mac": "0a:58:0a:85:00:2f", "default": true, "dns": {} }] leaderworkerset.sigs.k8s.io/leader-name: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0 leaderworkerset.sigs.k8s.io/size: "2" openshift.io/scc: openshift-ai-llminferenceservice-scc seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: serviceaccount creationTimestamp: "2026-05-26T12:11:18Z" deletionGracePeriodSeconds: 30 deletionTimestamp: "2026-05-26T12:14:30Z" generateName: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0- generation: 2 labels: app.kubernetes.io/component: llminferenceservice-workload-worker app.kubernetes.io/name: llmisvc-model-fb-opt-125m-route-f312f5ec app.kubernetes.io/part-of: llminferenceservice apps.kubernetes.io/pod-index: "1" controller-revision-hash: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0-5875b7f784 leaderworkerset.sigs.k8s.io/group-index: "0" leaderworkerset.sigs.k8s.io/group-key: f4e678a6cc0bc4b72f9b105c667689266cad99bc leaderworkerset.sigs.k8s.io/name: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn leaderworkerset.sigs.k8s.io/template-revision-hash: 7fd9f48f9d leaderworkerset.sigs.k8s.io/worker-index: "1" statefulset.kubernetes.io/pod-name: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0-1 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-136-225 operation: Update subresource: status time: "2026-05-26T12:11:18Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:leaderworkerset.sigs.k8s.io/leader-name: {} f:leaderworkerset.sigs.k8s.io/size: {} f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:apps.kubernetes.io/pod-index: {} f:controller-revision-hash: {} f:leaderworkerset.sigs.k8s.io/group-index: {} f:leaderworkerset.sigs.k8s.io/group-key: {} f:leaderworkerset.sigs.k8s.io/name: {} f:leaderworkerset.sigs.k8s.io/template-revision-hash: {} f:statefulset.kubernetes.io/pod-name: {} f:ownerReferences: .: {} k:{"uid":"a7afd3ea-f048-4c47-b9ce-862d2d51d583"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"TORCHINDUCTOR_CACHE_DIR"}: .: {} f:name: {} f:value: {} k:{"name":"USER"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:add: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:hostname: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:subdomain: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-05-26T12:11:18Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-05-26T12:11:18Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:reason: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:reason: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.133.0.47"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-05-26T12:14:30Z" name: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0-1 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: StatefulSet name: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0 uid: a7afd3ea-f048-4c47-b9ce-862d2d51d583 resourceVersion: "48483" uid: bf17ce6f-67aa-4404-ae9c-f1e732440acc spec: containers: - args: - --served-model-name - facebook/opt-125m - --port - "8000" - --enable-ssl-refresh - --ssl-certfile - /var/run/kserve/tls/tls.crt - --ssl-keyfile - /var/run/kserve/tls/tls.key command: - vllm - serve - /mnt/models env: - name: LWS_LEADER_ADDRESS value: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0.llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn.kserve-ci-e2e-test - name: LWS_GROUP_SIZE value: "2" - name: LWS_WORKER_INDEX value: "1" - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: USER value: nonroot - name: TORCHINDUCTOR_CACHE_DIR value: /tmp/torchinductor-cache - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: INFO - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imagePullPolicy: IfNotPresent name: main ports: - containerPort: 8000 protocol: TCP resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: add: - IPC_LOCK - SYS_RAWIO - NET_RAW drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000690000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt/models name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t2h64 readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true hostname: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0-1 imagePullSecrets: - name: default-dockercfg-qjhlk - name: llmisvc-model-fb-opt-125m-route-f312f5-cb7fb8cf-dockercfg-ffgv5 initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: LWS_LEADER_ADDRESS value: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn-0.llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn.kserve-ci-e2e-test - name: LWS_GROUP_SIZE value: "2" - name: LWS_WORKER_INDEX value: "1" - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" image: quay.io/opendatahub/kserve-storage-initializer@sha256:30bffa55f5e3e0e2f6d023c0f84846c206091122d724f5a0e810b4fb957dd389 imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000690000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t2h64 readOnly: true nodeName: ip-10-0-136-225.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: seLinuxOptions: level: s0:c26,c20 seccompProfile: type: RuntimeDefault serviceAccount: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn serviceAccountName: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn subdomain: llmisvc-model-fb-opt-125m-route-f312f5ec-kserve-mn terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: {} name: tmp-dir - emptyDir: medium: Memory sizeLimit: 8Gi name: dshm - emptyDir: {} name: model-cache - name: tls-certs secret: defaultMode: 420 secretName: llmisv4e643bc258191ffc517a31cd1d0ddd27-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - name: kube-api-access-t2h64 projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-05-26T12:14:30Z" status: "False" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-05-26T12:12:18Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-05-26T12:14:30Z" reason: PodFailed status: "False" type: Ready - lastProbeTime: null lastTransitionTime: "2026-05-26T12:14:30Z" reason: PodFailed status: "False" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-05-26T12:11:18Z" status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://833e65696cad7ca68a5ea0542f5d65a5936143f6e8d19a5d6f33c44b133e78af image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imageID: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo@sha256:afb39fca138b51d019d986229d546531b45a2a3deb73bcf59bd42406e13fbba0 lastState: {} name: main ready: false resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: false state: terminated: containerID: cri-o://833e65696cad7ca68a5ea0542f5d65a5936143f6e8d19a5d6f33c44b133e78af exitCode: 137 finishedAt: "2026-05-26T12:14:30Z" message: | : GET (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /metrics, Methods: GET (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/models, Methods: GET (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /ping, Methods: GET (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /ping, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /invocations, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/chat/completions, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/chat/completions/batch, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/responses, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/responses/{response_id}, Methods: GET (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/responses/{response_id}/cancel, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/completions, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/messages, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/messages/count_tokens, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /inference/v1/generate, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /scale_elastic_ep, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /is_scaling_elastic_ep, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/chat/completions/render, Methods: POST (APIServer pid=1) INFO 05-26 12:14:23 [launcher.py:46] Route: /v1/completions/render, Methods: POST (APIServer pid=1) INFO: Started server process [1] (APIServer pid=1) INFO: Waiting for application startup. (APIServer pid=1) INFO 05-26 12:14:23 [ssl.py:60] SSLCertRefresher monitors files: ['/var/run/kserve/tls/tls.key', '/var/run/kserve/tls/tls.crt'] (APIServer pid=1) INFO: Application startup complete. reason: Error startedAt: "2026-05-26T12:12:46Z" user: linux: gid: 0 supplementalGroups: - 0 uid: 1000690000 volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt/models name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t2h64 readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.136.225 hostIPs: - ip: 10.0.136.225 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://3528440828a0fbe65d29b8ff508a98da4e46feab139768be2c9005c70a64793f image: quay.io/opendatahub/kserve-storage-initializer@sha256:30bffa55f5e3e0e2f6d023c0f84846c206091122d724f5a0e810b4fb957dd389 imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:30bffa55f5e3e0e2f6d023c0f84846c206091122d724f5a0e810b4fb957dd389 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://3528440828a0fbe65d29b8ff508a98da4e46feab139768be2c9005c70a64793f exitCode: 0 finishedAt: "2026-05-26T12:12:18Z" reason: Completed startedAt: "2026-05-26T12:11:23Z" user: linux: gid: 0 supplementalGroups: - 0 uid: 1000690000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t2h64 readOnly: true recursiveReadOnly: Disabled phase: Failed podIP: 10.133.0.47 podIPs: - ip: 10.133.0.47 qosClass: Burstable startTime: "2026-05-26T12:11:18Z"