--- apiVersion: v1 items: - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.133.0.35/23"],"mac_address":"0a:58:0a:85:00:23","gateway_ips":["10.133.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.133.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.133.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.133.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.133.0.1"}],"ip_address":"10.133.0.35/23","gateway_ip":"10.133.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.133.0.35" ], "mac": "0a:58:0a:85:00:23", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:03:11Z" generateName: auth-enabled-test-kserve-85d86d876c- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-workload app.kubernetes.io/name: auth-enabled-test app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: both pod-template-hash: 85d86d876c managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-141-25 operation: Update subresource: status time: "2026-06-15T06:03:11Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"afbab033-b48f-4827-a683-4e1cc3932d27"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"TORCHINDUCTOR_CACHE_DIR"}: .: {} f:name: {} f:value: {} k:{"name":"USER"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:lifecycle: .: {} f:preStop: .: {} f:exec: .: {} f:command: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:03:11Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:03:11Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:message: {} f:observedGeneration: {} f:reason: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:message: {} f:observedGeneration: {} f:reason: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:message: {} f:observedGeneration: {} f:reason: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.133.0.35"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:03:16Z" name: auth-enabled-test-kserve-85d86d876c-vrqhw namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: auth-enabled-test-kserve-85d86d876c uid: afbab033-b48f-4827-a683-4e1cc3932d27 resourceVersion: "24347" uid: 03be4983-7e0f-4763-b9f9-537fa20b4610 spec: containers: - command: - /bin/bash - -c - |- if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then source /etc/profile.d/ibm-aiu-setup.sh fi if [ "$KSERVE_INFER_ROCE" = "true" ]; then echo "Trying to infer RoCE configs ... " grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null cat /proc/driver/nvidia/params KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"} echo "[Infer RoCE] Discovering active HCAs ..." active_hcas=() # Loop through all mlx5 devices found in sysfs for hca_dir in /sys/class/infiniband/mlx5_*; do # Ensure it's a directory before proceeding if [ -d "$hca_dir" ]; then hca_name=$(basename "$hca_dir") port_state_file="$hca_dir/ports/1/state" # Assume port 1 type_file="$hca_dir/ports/1/gid_attrs/types/*" echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'" if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then echo "[Infer RoCE] Found active HCA: $hca_name" active_hcas+=("$hca_name") else echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name" fi fi done ucx_hcas=() for hca in "${active_hcas[@]}"; do ucx_hcas+=("${hca}:1") done # Check if we found any active HCAs if [ ${#active_hcas[@]} -gt 0 ]; then # Join the array elements with a comma hcas=$(IFS=,; echo "${active_hcas[*]}") echo "[Infer RoCE] Setting active HCAs: ${hcas}" export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}} export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}} export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}} echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}" echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}" else echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set." fi if [ ${#active_hcas[@]} -gt 0 ]; then echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..." # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs declare -A gid_index_count declare -A hca_gid_index for hca_name in "${active_hcas[@]}"; do echo "[Infer RoCE] Processing HCA: ${hca_name}" # Find all RoCE v2 IPv4 GIDs for this HCA and count by index for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then idx=$(basename "$tpath") gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}" # Check for IPv4 GID (contains ffff:) if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then gid_value=$(cat "$gid_file" 2>/dev/null || echo "") echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}" hca_gid_index["${hca_name}"]="${idx}" gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1)) break # Use first found IPv4 GID per HCA fi fi done done # Find the most common GID index (most likely to be consistent across nodes) best_gid_index="" max_count=0 for idx in "${!gid_index_count[@]}"; do count=${gid_index_count["${idx}"]} echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs" if [ $count -gt $max_count ]; then max_count=$count best_gid_index="$idx" fi done # Use deterministic fallback if counts are equal - prefer lower index number if [ ${#gid_index_count[@]} -gt 1 ]; then echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}" # If there's a tie, prefer index 3 as it's most common in SR-IOV setups if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then best_gid_index="3" echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)" fi fi # Check if GID_INDEX is already set via environment variables if [ -n "${NCCL_IB_GID_INDEX}" ]; then echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment" export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX" elif [ -n "$best_gid_index" ]; then echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)" export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index} export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index} echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX" else echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA." fi else echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference." fi fi # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011). # Older versions still need the blanket --disable-uvicorn-access-log. ACCESS_LOG_ARGS="--disable-uvicorn-access-log" VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}') echo "[access-log-detect] vllm version='${VLLM_VERSION}'" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping" fi echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'" # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666). SHUTDOWN_TIMEOUT_ARGS="" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40" fi eval "exec vllm serve /mnt/models \ --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \ --port 8000 \ ${ACCESS_LOG_ARGS} \ ${SHUTDOWN_TIMEOUT_ARGS} \ --enable-ssl-refresh \ --ssl-certfile /var/run/kserve/tls/tls.crt \ --ssl-keyfile /var/run/kserve/tls/tls.key \ ${VLLM_ADDITIONAL_ARGS} \ $@" - -- env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: DEBUG - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: VLLM_ENABLE_V1_MULTIPROCESSING value: "0" - name: USER value: nonroot - name: TORCHINDUCTOR_CACHE_DIR value: /tmp/torchinductor-cache - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imagePullPolicy: IfNotPresent lifecycle: preStop: exec: command: - /bin/sleep - "15" livenessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 name: main ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt/models name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-d2vjn readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-d2vjn readOnly: true nodeName: ip-10-0-141-25.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 60 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - emptyDir: {} name: tmp-dir - name: tls-certs secret: defaultMode: 420 secretName: auth-enabled-test-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - name: kube-api-access-d2vjn projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:16Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:11Z" message: 'containers with incomplete status: [storage-initializer]' observedGeneration: 1 reason: ContainersNotInitialized status: "False" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:11Z" message: 'containers with unready status: [main]' observedGeneration: 1 reason: ContainersNotReady status: "False" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:11Z" message: 'containers with unready status: [main]' observedGeneration: 1 reason: ContainersNotReady status: "False" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:11Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imageID: "" lastState: {} name: main ready: false restartCount: 0 started: false state: waiting: reason: PodInitializing volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt/models name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-d2vjn readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.141.25 hostIPs: - ip: 10.0.141.25 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://8b4c36867ef169438602cb7c2bd89a577c73c2c0994c64f9ca6b693147449951 image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: false resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:03:15Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-d2vjn readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Pending podIP: 10.133.0.35 podIPs: - ip: 10.133.0.35 qosClass: Burstable startTime: "2026-06-15T06:03:11Z" - apiVersion: v1 kind: Pod metadata: annotations: app.kubernetes.io/version: 0.7.0 certificates.kserve.io/expiration-v2: "true" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.31/23"],"mac_address":"0a:58:0a:86:00:1f","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.31/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.31" ], "mac": "0a:58:0a:86:00:1f", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:03:11Z" generateName: auth-enabled-test-kserve-router-scheduler-6c5d597fbb- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-router-scheduler app.kubernetes.io/name: auth-enabled-test app.kubernetes.io/part-of: llminferenceservice pod-template-hash: 6c5d597fbb managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-226 operation: Update subresource: status time: "2026-06-15T06:03:11Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:app.kubernetes.io/version: {} f:certificates.kserve.io/expiration-v2: {} f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"eedb0e24-1784-41de-852e-01b08abb9f57"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":5557,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9002,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9003,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"tokenizer"}: .: {} f:env: .: {} k:{"name":"TOKENIZERS_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8082,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/.cache"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models/base"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} f:workingDir: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} k:{"name":"STORAGE_ALLOW_PATTERNS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tokenizer-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-tmp"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-uds"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:03:11Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:03:12Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.31"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:03:43Z" name: auth-enabled-test-kserve-router-scheduler-6c5d597fbb-nhwh9 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: auth-enabled-test-kserve-router-scheduler-6c5d597fbb uid: eedb0e24-1784-41de-852e-01b08abb9f57 resourceVersion: "24854" uid: 9359d817-81c6-4fe2-80e0-76d36607616d spec: containers: - args: - --config-text - | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: single-profile-handler - type: queue-scorer - type: prefix-cache-scorer - type: max-score-picker schedulingProfiles: - name: default plugins: - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker command: - /app/epp - --pool-name - auth-enabled-test-inference-pool - --pool-namespace - kserve-ci-e2e-test - --zap-encoder - json - --grpc-port - "9002" - --grpc-health-port - "9003" - --enable-cert-reload=true - --secure-serving=true - --model-server-metrics-scheme=https - --cert-path=/var/run/kserve/tls env: - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 grpc: port: 9003 service: liveness initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 name: main ports: - containerPort: 9002 name: grpc protocol: TCP - containerPort: 9003 name: grpc-health protocol: TCP - containerPort: 9090 name: metrics protocol: TCP - containerPort: 5557 name: zmq protocol: TCP readinessProbe: failureThreshold: 3 grpc: port: 9003 service: readiness initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-h88n7 readOnly: true - env: - name: TOKENIZERS_DIR value: /mnt/models image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 5 name: tokenizer ports: - containerPort: 8082 name: health protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /healthz port: 8082 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-h88n7 readOnly: true workingDir: /mnt/models dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp - name: auth-enabled-test-epp-sa-dockercfg-dz9xz initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" - name: STORAGE_ALLOW_PATTERNS value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "vocab.json", "merges.txt", "config.json", "generation_config.json"]' image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-h88n7 readOnly: true nodeName: ip-10-0-128-226.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: auth-enabled-test-epp-sa serviceAccountName: auth-enabled-test-epp-sa terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: tls-certs secret: defaultMode: 420 secretName: auth-enabled-test-kserve-self-signed-certs - emptyDir: {} name: tokenizer-uds - emptyDir: {} name: tokenizer-tmp - emptyDir: {} name: tokenizer-cache - emptyDir: {} name: kserve-provision-location - name: kube-api-access-h88n7 projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:12Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:13Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:43Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:43Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:03:11Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://f17a74289ac80e1118204b380d94d45d17e4d27afd28e9cb9557783dbe08aeef image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imageID: ghcr.io/llm-d/llm-d-inference-scheduler@sha256:88de279c6eb6758a4c600de9730e49e46b04c392846afedd03d82447379c9e7a lastState: {} name: main ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:03:13Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-h88n7 readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://d98757a96b0a8e28ff609c63ebd79b7448768eeb80b847f7b2eaf808789a37ce image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imageID: ghcr.io/llm-d/llm-d-uds-tokenizer@sha256:aed091a51f3d64458f1fdb451d21f745186bb4517a7ba0c49913a0c617366a3e lastState: {} name: tokenizer ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:03:14Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-h88n7 readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.226 hostIPs: - ip: 10.0.128.226 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://55bcaeaed2a108002a4cc5fd3a43c9f6c911f9361281b7d4e5a71122e0d9f91c image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://55bcaeaed2a108002a4cc5fd3a43c9f6c911f9361281b7d4e5a71122e0d9f91c exitCode: 0 finishedAt: "2026-06-15T06:03:12Z" reason: Completed startedAt: "2026-06-15T06:03:12Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-h88n7 readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.134.0.31 podIPs: - ip: 10.134.0.31 qosClass: Burstable startTime: "2026-06-15T06:03:11Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.43/23"],"mac_address":"0a:58:0a:86:00:2b","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.43/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.43" ], "mac": "0a:58:0a:86:00:2b", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:45:13Z" generateName: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-6dbc7ddb8d- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-workload app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-7ca60146 app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: both pod-template-hash: 6dbc7ddb8d managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-226 operation: Update subresource: status time: "2026-06-15T06:45:13Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"2a8b4d6b-21fa-4755-bad0-3501d8b92cfb"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"TORCHINDUCTOR_CACHE_DIR"}: .: {} f:name: {} f:value: {} k:{"name":"USER"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:lifecycle: .: {} f:preStop: .: {} f:exec: .: {} f:command: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"CA_BUNDLE_CONFIGMAP_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"CA_BUNDLE_VOLUME_MOUNT_POINT"}: .: {} f:name: {} f:value: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/etc/ssl/custom-certs"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/mnt"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"cabundle-cert"}: .: {} f:configMap: .: {} f:defaultMode: {} f:name: {} f:name: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:45:13Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:message: {} f:observedGeneration: {} f:reason: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:message: {} f:observedGeneration: {} f:reason: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:message: {} f:observedGeneration: {} f:reason: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.43"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:45:14Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:45:14Z" name: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-6dbc7ddb8d-c625w namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-6dbc7ddb8d uid: 2a8b4d6b-21fa-4755-bad0-3501d8b92cfb resourceVersion: "53991" uid: 2bcc7d6e-7945-4c51-910d-4a0ffe93df64 spec: containers: - args: - --enable-lora - --lora-modules - '''{"name":"lora-adapter-1","path":"/mnt/lora/lora-adapter-1"}''' - '''{"name":"publishers/kserve-ci-e2e-test/models/lora-adapter-1","path":"/mnt/lora/lora-adapter-1"}''' command: - /bin/bash - -c - |- if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then source /etc/profile.d/ibm-aiu-setup.sh fi if [ "$KSERVE_INFER_ROCE" = "true" ]; then echo "Trying to infer RoCE configs ... " grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null cat /proc/driver/nvidia/params KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"} echo "[Infer RoCE] Discovering active HCAs ..." active_hcas=() # Loop through all mlx5 devices found in sysfs for hca_dir in /sys/class/infiniband/mlx5_*; do # Ensure it's a directory before proceeding if [ -d "$hca_dir" ]; then hca_name=$(basename "$hca_dir") port_state_file="$hca_dir/ports/1/state" # Assume port 1 type_file="$hca_dir/ports/1/gid_attrs/types/*" echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'" if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then echo "[Infer RoCE] Found active HCA: $hca_name" active_hcas+=("$hca_name") else echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name" fi fi done ucx_hcas=() for hca in "${active_hcas[@]}"; do ucx_hcas+=("${hca}:1") done # Check if we found any active HCAs if [ ${#active_hcas[@]} -gt 0 ]; then # Join the array elements with a comma hcas=$(IFS=,; echo "${active_hcas[*]}") echo "[Infer RoCE] Setting active HCAs: ${hcas}" export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}} export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}} export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}} echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}" echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}" else echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set." fi if [ ${#active_hcas[@]} -gt 0 ]; then echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..." # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs declare -A gid_index_count declare -A hca_gid_index for hca_name in "${active_hcas[@]}"; do echo "[Infer RoCE] Processing HCA: ${hca_name}" # Find all RoCE v2 IPv4 GIDs for this HCA and count by index for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then idx=$(basename "$tpath") gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}" # Check for IPv4 GID (contains ffff:) if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then gid_value=$(cat "$gid_file" 2>/dev/null || echo "") echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}" hca_gid_index["${hca_name}"]="${idx}" gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1)) break # Use first found IPv4 GID per HCA fi fi done done # Find the most common GID index (most likely to be consistent across nodes) best_gid_index="" max_count=0 for idx in "${!gid_index_count[@]}"; do count=${gid_index_count["${idx}"]} echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs" if [ $count -gt $max_count ]; then max_count=$count best_gid_index="$idx" fi done # Use deterministic fallback if counts are equal - prefer lower index number if [ ${#gid_index_count[@]} -gt 1 ]; then echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}" # If there's a tie, prefer index 3 as it's most common in SR-IOV setups if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then best_gid_index="3" echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)" fi fi # Check if GID_INDEX is already set via environment variables if [ -n "${NCCL_IB_GID_INDEX}" ]; then echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment" export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX" elif [ -n "$best_gid_index" ]; then echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)" export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index} export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index} echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX" else echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA." fi else echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference." fi fi # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011). # Older versions still need the blanket --disable-uvicorn-access-log. ACCESS_LOG_ARGS="--disable-uvicorn-access-log" VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}') echo "[access-log-detect] vllm version='${VLLM_VERSION}'" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping" fi echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'" # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666). SHUTDOWN_TIMEOUT_ARGS="" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40" fi eval "exec vllm serve /mnt/models \ --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \ --port 8000 \ ${ACCESS_LOG_ARGS} \ ${SHUTDOWN_TIMEOUT_ARGS} \ --enable-ssl-refresh \ --ssl-certfile /var/run/kserve/tls/tls.crt \ --ssl-keyfile /var/run/kserve/tls/tls.key \ ${VLLM_ADDITIONAL_ARGS} \ $@" - -- env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: DEBUG - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: VLLM_ENABLE_V1_MULTIPROCESSING value: "0" - name: USER value: nonroot - name: TORCHINDUCTOR_CACHE_DIR value: /tmp/torchinductor-cache - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imagePullPolicy: IfNotPresent lifecycle: preStop: exec: command: - /bin/sleep - "15" livenessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 name: main ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-pq8qp readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp initContainers: - args: - hf://facebook/opt-125m - /mnt/models - hf://edbeeching/opt-125m-lora - /mnt/lora/lora-adapter-1 env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" - name: CA_BUNDLE_CONFIGMAP_NAME value: odh-kserve-custom-ca-bundle - name: CA_BUNDLE_VOLUME_MOUNT_POINT value: /etc/ssl/custom-certs image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt name: kserve-provision-location - mountPath: /etc/ssl/custom-certs name: cabundle-cert readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-pq8qp readOnly: true nodeName: ip-10-0-128-226.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 60 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - emptyDir: {} name: tmp-dir - name: tls-certs secret: defaultMode: 420 secretName: llmisv3e414c2ba058a022dfd694dbcbac5b51-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - configMap: defaultMode: 420 name: odh-kserve-custom-ca-bundle name: cabundle-cert - name: kube-api-access-pq8qp projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:14Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:13Z" message: 'containers with incomplete status: [storage-initializer]' observedGeneration: 1 reason: ContainersNotInitialized status: "False" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:13Z" message: 'containers with unready status: [main]' observedGeneration: 1 reason: ContainersNotReady status: "False" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:13Z" message: 'containers with unready status: [main]' observedGeneration: 1 reason: ContainersNotReady status: "False" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:13Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imageID: "" lastState: {} name: main ready: false restartCount: 0 started: false state: waiting: reason: PodInitializing volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-pq8qp readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.226 hostIPs: - ip: 10.0.128.226 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://47a49ea18aded36d93af1bed5e257738fc523bad3897e0d9cb2050d828fda45d image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: false resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:45:14Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt name: kserve-provision-location - mountPath: /etc/ssl/custom-certs name: cabundle-cert readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-pq8qp readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Pending podIP: 10.134.0.43 podIPs: - ip: 10.134.0.43 qosClass: Burstable startTime: "2026-06-15T06:45:13Z" - apiVersion: v1 kind: Pod metadata: annotations: app.kubernetes.io/version: 0.7.0 certificates.kserve.io/expiration-v2: "true" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.133.0.42/23"],"mac_address":"0a:58:0a:85:00:2a","gateway_ips":["10.133.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.133.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.133.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.133.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.133.0.1"}],"ip_address":"10.133.0.42/23","gateway_ip":"10.133.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.133.0.42" ], "mac": "0a:58:0a:85:00:2a", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:45:13Z" generateName: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-router-scheduler-7cdd64995b- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-router-scheduler app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-7ca60146 app.kubernetes.io/part-of: llminferenceservice pod-template-hash: 7cdd64995b managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-141-25 operation: Update subresource: status time: "2026-06-15T06:45:13Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:app.kubernetes.io/version: {} f:certificates.kserve.io/expiration-v2: {} f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"fcaa5ab9-e3b2-4235-bd26-1a7bd182a576"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":5557,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9002,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9003,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"tokenizer"}: .: {} f:env: .: {} k:{"name":"TOKENIZERS_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8082,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/.cache"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models/base"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} f:workingDir: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} k:{"name":"STORAGE_ALLOW_PATTERNS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tokenizer-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-tmp"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-uds"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:45:13Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:45:14Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.133.0.42"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:45:45Z" name: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-router-schengntw namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-router-scheduler-7cdd64995b uid: fcaa5ab9-e3b2-4235-bd26-1a7bd182a576 resourceVersion: "54710" uid: d15a90cc-1bc7-4614-9bf0-26231da0604c spec: containers: - args: - --config-text - | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: single-profile-handler - type: queue-scorer - type: prefix-cache-scorer - type: max-score-picker schedulingProfiles: - name: default plugins: - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker command: - /app/epp - --pool-name - llmisvc-model-fb-opt-125m-with-7ca60146-inference-pool - --pool-namespace - kserve-ci-e2e-test - --zap-encoder - json - --grpc-port - "9002" - --grpc-health-port - "9003" - --enable-cert-reload=true - --secure-serving=true - --model-server-metrics-scheme=https - --cert-path=/var/run/kserve/tls env: - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 grpc: port: 9003 service: liveness initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 name: main ports: - containerPort: 9002 name: grpc protocol: TCP - containerPort: 9003 name: grpc-health protocol: TCP - containerPort: 9090 name: metrics protocol: TCP - containerPort: 5557 name: zmq protocol: TCP readinessProbe: failureThreshold: 3 grpc: port: 9003 service: readiness initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwz6g readOnly: true - env: - name: TOKENIZERS_DIR value: /mnt/models image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 5 name: tokenizer ports: - containerPort: 8082 name: health protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /healthz port: 8082 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwz6g readOnly: true workingDir: /mnt/models dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp - name: llmisvc-model-fb-opt-125m-with-7ca60146-epp-sa-dockercfg-sd6cp initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" - name: STORAGE_ALLOW_PATTERNS value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "vocab.json", "merges.txt", "config.json", "generation_config.json"]' image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwz6g readOnly: true nodeName: ip-10-0-141-25.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: llmisvc-model-fb-opt-125m-with-7ca60146-epp-sa serviceAccountName: llmisvc-model-fb-opt-125m-with-7ca60146-epp-sa terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: tls-certs secret: defaultMode: 420 secretName: llmisv3e414c2ba058a022dfd694dbcbac5b51-kserve-self-signed-certs - emptyDir: {} name: tokenizer-uds - emptyDir: {} name: tokenizer-tmp - emptyDir: {} name: tokenizer-cache - emptyDir: {} name: kserve-provision-location - name: kube-api-access-xwz6g projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:14Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:15Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:45Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:45Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:45:13Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://7d276bd5f209a7e7c71ba25011b21543dcc017333731b89982cb26b3bdfdd24e image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imageID: ghcr.io/llm-d/llm-d-inference-scheduler@sha256:88de279c6eb6758a4c600de9730e49e46b04c392846afedd03d82447379c9e7a lastState: {} name: main ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:45:15Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwz6g readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://34fb32fa06720a4f94fef5476a3ceabdbbd1523fa4b06331251b4b32b4151219 image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imageID: ghcr.io/llm-d/llm-d-uds-tokenizer@sha256:aed091a51f3d64458f1fdb451d21f745186bb4517a7ba0c49913a0c617366a3e lastState: {} name: tokenizer ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:45:16Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwz6g readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.141.25 hostIPs: - ip: 10.0.141.25 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://81f8edcb9db283dbcea930e9d7dbde3e12ab040f916194724bdf612f9a6f3119 image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://81f8edcb9db283dbcea930e9d7dbde3e12ab040f916194724bdf612f9a6f3119 exitCode: 0 finishedAt: "2026-06-15T06:45:15Z" reason: Completed startedAt: "2026-06-15T06:45:14Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-xwz6g readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.133.0.42 podIPs: - ip: 10.133.0.42 qosClass: Burstable startTime: "2026-06-15T06:45:13Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.50/23"],"mac_address":"0a:58:0a:86:00:32","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.50/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.50" ], "mac": "0a:58:0a:86:00:32", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:59:59Z" generateName: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-766cc944c5- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-workload app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-ba4d693a app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: both pod-template-hash: 766cc944c5 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-226 operation: Update subresource: status time: "2026-06-15T06:59:59Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"32f9748e-052a-43d4-bd64-34af57930dcb"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"TORCHINDUCTOR_CACHE_DIR"}: .: {} f:name: {} f:value: {} k:{"name":"USER"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:lifecycle: .: {} f:preStop: .: {} f:exec: .: {} f:command: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"CA_BUNDLE_CONFIGMAP_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"CA_BUNDLE_VOLUME_MOUNT_POINT"}: .: {} f:name: {} f:value: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/etc/ssl/custom-certs"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/mnt"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"cabundle-cert"}: .: {} f:configMap: .: {} f:defaultMode: {} f:name: {} f:name: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:59:59Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:59:59Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.50"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T07:01:49Z" name: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-766cc944c5-85gl8 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-766cc944c5 uid: 32f9748e-052a-43d4-bd64-34af57930dcb resourceVersion: "67992" uid: 8572c5e6-3714-4917-b782-31ec52c11134 spec: containers: - args: - --enable-lora - --lora-modules - '''{"name":"lora-adapter-1","path":"/mnt/lora/lora-adapter-1"}''' - '''{"name":"publishers/kserve-ci-e2e-test/models/lora-adapter-1","path":"/mnt/lora/lora-adapter-1"}''' command: - /bin/bash - -c - |- if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then source /etc/profile.d/ibm-aiu-setup.sh fi if [ "$KSERVE_INFER_ROCE" = "true" ]; then echo "Trying to infer RoCE configs ... " grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null cat /proc/driver/nvidia/params KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"} echo "[Infer RoCE] Discovering active HCAs ..." active_hcas=() # Loop through all mlx5 devices found in sysfs for hca_dir in /sys/class/infiniband/mlx5_*; do # Ensure it's a directory before proceeding if [ -d "$hca_dir" ]; then hca_name=$(basename "$hca_dir") port_state_file="$hca_dir/ports/1/state" # Assume port 1 type_file="$hca_dir/ports/1/gid_attrs/types/*" echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'" if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then echo "[Infer RoCE] Found active HCA: $hca_name" active_hcas+=("$hca_name") else echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name" fi fi done ucx_hcas=() for hca in "${active_hcas[@]}"; do ucx_hcas+=("${hca}:1") done # Check if we found any active HCAs if [ ${#active_hcas[@]} -gt 0 ]; then # Join the array elements with a comma hcas=$(IFS=,; echo "${active_hcas[*]}") echo "[Infer RoCE] Setting active HCAs: ${hcas}" export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}} export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}} export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}} echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}" echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}" else echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set." fi if [ ${#active_hcas[@]} -gt 0 ]; then echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..." # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs declare -A gid_index_count declare -A hca_gid_index for hca_name in "${active_hcas[@]}"; do echo "[Infer RoCE] Processing HCA: ${hca_name}" # Find all RoCE v2 IPv4 GIDs for this HCA and count by index for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then idx=$(basename "$tpath") gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}" # Check for IPv4 GID (contains ffff:) if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then gid_value=$(cat "$gid_file" 2>/dev/null || echo "") echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}" hca_gid_index["${hca_name}"]="${idx}" gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1)) break # Use first found IPv4 GID per HCA fi fi done done # Find the most common GID index (most likely to be consistent across nodes) best_gid_index="" max_count=0 for idx in "${!gid_index_count[@]}"; do count=${gid_index_count["${idx}"]} echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs" if [ $count -gt $max_count ]; then max_count=$count best_gid_index="$idx" fi done # Use deterministic fallback if counts are equal - prefer lower index number if [ ${#gid_index_count[@]} -gt 1 ]; then echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}" # If there's a tie, prefer index 3 as it's most common in SR-IOV setups if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then best_gid_index="3" echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)" fi fi # Check if GID_INDEX is already set via environment variables if [ -n "${NCCL_IB_GID_INDEX}" ]; then echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment" export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX" elif [ -n "$best_gid_index" ]; then echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)" export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index} export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index} echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX" else echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA." fi else echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference." fi fi # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011). # Older versions still need the blanket --disable-uvicorn-access-log. ACCESS_LOG_ARGS="--disable-uvicorn-access-log" VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}') echo "[access-log-detect] vllm version='${VLLM_VERSION}'" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping" fi echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'" # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666). SHUTDOWN_TIMEOUT_ARGS="" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40" fi eval "exec vllm serve /mnt/models \ --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \ --port 8000 \ ${ACCESS_LOG_ARGS} \ ${SHUTDOWN_TIMEOUT_ARGS} \ --enable-ssl-refresh \ --ssl-certfile /var/run/kserve/tls/tls.crt \ --ssl-keyfile /var/run/kserve/tls/tls.key \ ${VLLM_ADDITIONAL_ARGS} \ $@" - -- env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: DEBUG - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: VLLM_ENABLE_V1_MULTIPROCESSING value: "0" - name: USER value: nonroot - name: TORCHINDUCTOR_CACHE_DIR value: /tmp/torchinductor-cache - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imagePullPolicy: IfNotPresent lifecycle: preStop: exec: command: - /bin/sleep - "15" livenessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 name: main ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-9mk6p readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp initContainers: - args: - hf://facebook/opt-125m - /mnt/models - hf://edbeeching/opt-125m-lora - /mnt/lora/lora-adapter-1 env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" - name: CA_BUNDLE_CONFIGMAP_NAME value: odh-kserve-custom-ca-bundle - name: CA_BUNDLE_VOLUME_MOUNT_POINT value: /etc/ssl/custom-certs image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt name: kserve-provision-location - mountPath: /etc/ssl/custom-certs name: cabundle-cert readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-9mk6p readOnly: true nodeName: ip-10-0-128-226.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 60 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - emptyDir: {} name: tmp-dir - name: tls-certs secret: defaultMode: 420 secretName: llmisv77ff2528d3e9b4972cd9335229fce9f0-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - configMap: defaultMode: 420 name: odh-kserve-custom-ca-bundle name: cabundle-cert - name: kube-api-access-9mk6p projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T07:00:00Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T07:00:24Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T07:01:49Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T07:01:49Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:59:59Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://7e2a1e01bf22fe3caafc6483f1651317c4ec17ef08f1dcde0005e59880e149d0 image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imageID: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo@sha256:afb39fca138b51d019d986229d546531b45a2a3deb73bcf59bd42406e13fbba0 lastState: {} name: main ready: true resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: true state: running: startedAt: "2026-06-15T07:00:24Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-9mk6p readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.226 hostIPs: - ip: 10.0.128.226 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://e4b50a533016f1dcb35821769f2f190ad25dc25c902839f4a374e6efec3cd120 image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://e4b50a533016f1dcb35821769f2f190ad25dc25c902839f4a374e6efec3cd120 exitCode: 0 finishedAt: "2026-06-15T07:00:24Z" reason: Completed startedAt: "2026-06-15T07:00:00Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt name: kserve-provision-location - mountPath: /etc/ssl/custom-certs name: cabundle-cert readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-9mk6p readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.134.0.50 podIPs: - ip: 10.134.0.50 qosClass: Burstable startTime: "2026-06-15T06:59:59Z" - apiVersion: v1 kind: Pod metadata: annotations: app.kubernetes.io/version: 0.7.0 certificates.kserve.io/expiration-v2: "true" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.133.0.46/23"],"mac_address":"0a:58:0a:85:00:2e","gateway_ips":["10.133.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.133.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.133.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.133.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.133.0.1"}],"ip_address":"10.133.0.46/23","gateway_ip":"10.133.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.133.0.46" ], "mac": "0a:58:0a:85:00:2e", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:59:59Z" generateName: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-router-scheduler-86f69d9999- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-router-scheduler app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-ba4d693a app.kubernetes.io/part-of: llminferenceservice pod-template-hash: 86f69d9999 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-141-25 operation: Update subresource: status time: "2026-06-15T06:59:59Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:app.kubernetes.io/version: {} f:certificates.kserve.io/expiration-v2: {} f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"c7ec8574-3741-403b-a777-99db38917f8f"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":5557,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9002,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9003,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"tokenizer"}: .: {} f:env: .: {} k:{"name":"TOKENIZERS_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8082,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/.cache"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models/base"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} f:workingDir: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} k:{"name":"STORAGE_ALLOW_PATTERNS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tokenizer-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-tmp"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-uds"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:59:59Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T07:00:00Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.133.0.46"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T07:00:31Z" name: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-router-schest4hz namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-router-scheduler-86f69d9999 uid: c7ec8574-3741-403b-a777-99db38917f8f resourceVersion: "66953" uid: d2addb78-1f4e-4983-8983-218180355838 spec: containers: - args: - --config-text - | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: single-profile-handler - type: queue-scorer - type: prefix-cache-scorer - type: max-score-picker schedulingProfiles: - name: default plugins: - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker command: - /app/epp - --pool-name - llmisvc-model-fb-opt-125m-with-ba4d693a-inference-pool - --pool-namespace - kserve-ci-e2e-test - --zap-encoder - json - --grpc-port - "9002" - --grpc-health-port - "9003" - --enable-cert-reload=true - --secure-serving=true - --model-server-metrics-scheme=https - --cert-path=/var/run/kserve/tls env: - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 grpc: port: 9003 service: liveness initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 name: main ports: - containerPort: 9002 name: grpc protocol: TCP - containerPort: 9003 name: grpc-health protocol: TCP - containerPort: 9090 name: metrics protocol: TCP - containerPort: 5557 name: zmq protocol: TCP readinessProbe: failureThreshold: 3 grpc: port: 9003 service: readiness initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5kcg6 readOnly: true - env: - name: TOKENIZERS_DIR value: /mnt/models image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 5 name: tokenizer ports: - containerPort: 8082 name: health protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /healthz port: 8082 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5kcg6 readOnly: true workingDir: /mnt/models dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp - name: llmisvc-model-fb-opt-125m-with-ba4d693a-epp-sa-dockercfg-9nvdk initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" - name: STORAGE_ALLOW_PATTERNS value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "vocab.json", "merges.txt", "config.json", "generation_config.json"]' image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5kcg6 readOnly: true nodeName: ip-10-0-141-25.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: llmisvc-model-fb-opt-125m-with-ba4d693a-epp-sa serviceAccountName: llmisvc-model-fb-opt-125m-with-ba4d693a-epp-sa terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: tls-certs secret: defaultMode: 420 secretName: llmisv77ff2528d3e9b4972cd9335229fce9f0-kserve-self-signed-certs - emptyDir: {} name: tokenizer-uds - emptyDir: {} name: tokenizer-tmp - emptyDir: {} name: tokenizer-cache - emptyDir: {} name: kserve-provision-location - name: kube-api-access-5kcg6 projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T07:00:00Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T07:00:01Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T07:00:31Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T07:00:31Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:59:59Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://38a75b8b7c9c2a6bee7e5522f89ae945d6a240936f3b0b2b060859d029ba1613 image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imageID: ghcr.io/llm-d/llm-d-inference-scheduler@sha256:88de279c6eb6758a4c600de9730e49e46b04c392846afedd03d82447379c9e7a lastState: {} name: main ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T07:00:01Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5kcg6 readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://cb308918fa53aef9fd362cfb693f2ceb35813fe1459d600b38914cb5df0685c4 image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imageID: ghcr.io/llm-d/llm-d-uds-tokenizer@sha256:aed091a51f3d64458f1fdb451d21f745186bb4517a7ba0c49913a0c617366a3e lastState: {} name: tokenizer ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T07:00:01Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5kcg6 readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.141.25 hostIPs: - ip: 10.0.141.25 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://b08e52af2d09e251985984fb269bc1130bce1d7700bf5a1263c10e91787b198b image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://b08e52af2d09e251985984fb269bc1130bce1d7700bf5a1263c10e91787b198b exitCode: 0 finishedAt: "2026-06-15T07:00:01Z" reason: Completed startedAt: "2026-06-15T07:00:00Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-5kcg6 readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.133.0.46 podIPs: - ip: 10.133.0.46 qosClass: Burstable startTime: "2026-06-15T06:59:59Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.132.0.47/23"],"mac_address":"0a:58:0a:84:00:2f","gateway_ips":["10.132.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.132.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.132.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.132.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.132.0.1"}],"ip_address":"10.132.0.47/23","gateway_ip":"10.132.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.132.0.47" ], "mac": "0a:58:0a:84:00:2f", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:25:41Z" generateName: llmisvc-router-managed-test-llm-4b931143-kserve-66f88bc44d- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-workload app.kubernetes.io/name: llmisvc-router-managed-test-llm-4b931143 app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: both pod-template-hash: 66f88bc44d managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-243 operation: Update subresource: status time: "2026-06-15T06:25:41Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"3b204cae-4fb7-4b0e-a6e5-c2274d4de174"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:lifecycle: .: {} f:preStop: .: {} f:exec: .: {} f:command: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:25:41Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:25:42Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.132.0.47"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:25:52Z" name: llmisvc-router-managed-test-llm-4b931143-kserve-66f88bc44dcnc5x namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: llmisvc-router-managed-test-llm-4b931143-kserve-66f88bc44d uid: 3b204cae-4fb7-4b0e-a6e5-c2274d4de174 resourceVersion: "41062" uid: d0c46224-53bd-4494-b0c7-835a49a21cf7 spec: containers: - args: - --port - "8000" - --model - facebook/opt-125m - --mode - random - --ssl-certfile - /var/run/kserve/tls/tls.crt - --ssl-keyfile - /var/run/kserve/tls/tls.key command: - /app/llm-d-inference-sim env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: INFO - name: HF_HUB_CACHE value: /models image: ghcr.io/llm-d/llm-d-inference-sim:v0.8.2 imagePullPolicy: IfNotPresent lifecycle: preStop: exec: command: - /bin/sleep - "15" livenessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 name: main ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "1" memory: 2Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-vl9lq readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp nodeName: ip-10-0-128-243.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 60 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - emptyDir: {} name: tmp-dir - name: tls-certs secret: defaultMode: 420 secretName: llmisvca2d2d7d499abb359505529ebe02c136-kserve-self-signed-certs - name: kube-api-access-vl9lq projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:25:43Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:25:41Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:25:52Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:25:52Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:25:41Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://32a6110a7b59885b076d50ddc1d5fbec6164561b724352dbf2a3ef11973d7444 image: ghcr.io/llm-d/llm-d-inference-sim:v0.8.2 imageID: ghcr.io/llm-d/llm-d-inference-sim@sha256:bab162bd25e2ed8b15022387cdb223023aeb33be49476af9f0115c0398fb8ff5 lastState: {} name: main ready: true resources: limits: cpu: "1" memory: 2Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:25:42Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-vl9lq readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.243 hostIPs: - ip: 10.0.128.243 observedGeneration: 1 phase: Running podIP: 10.132.0.47 podIPs: - ip: 10.132.0.47 qosClass: Burstable startTime: "2026-06-15T06:25:41Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.132.0.45/23"],"mac_address":"0a:58:0a:84:00:2d","gateway_ips":["10.132.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.132.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.132.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.132.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.132.0.1"}],"ip_address":"10.132.0.45/23","gateway_ip":"10.132.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.132.0.45" ], "mac": "0a:58:0a:84:00:2d", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:06:39Z" generateName: llmisvc-router-managed-test-llm-5b1e8f15-kserve-7c5bd57d44- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-workload app.kubernetes.io/name: llmisvc-router-managed-test-llm-5b1e8f15 app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: both pod-template-hash: 7c5bd57d44 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-243 operation: Update subresource: status time: "2026-06-15T06:06:39Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"b6522ee7-6b00-47fc-8f8e-7b5f486604f6"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:lifecycle: .: {} f:preStop: .: {} f:exec: .: {} f:command: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:06:39Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:06:39Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.132.0.45"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:06:49Z" name: llmisvc-router-managed-test-llm-5b1e8f15-kserve-7c5bd57d44b7wp8 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: llmisvc-router-managed-test-llm-5b1e8f15-kserve-7c5bd57d44 uid: b6522ee7-6b00-47fc-8f8e-7b5f486604f6 resourceVersion: "27608" uid: be8770ba-8db8-4a0c-99b7-a16c4f499392 spec: containers: - args: - --port - "8000" - --model - facebook/opt-125m - --mode - random - --ssl-certfile - /var/run/kserve/tls/tls.crt - --ssl-keyfile - /var/run/kserve/tls/tls.key command: - /app/llm-d-inference-sim env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: INFO - name: HF_HUB_CACHE value: /models image: ghcr.io/llm-d/llm-d-inference-sim:v0.8.2 imagePullPolicy: IfNotPresent lifecycle: preStop: exec: command: - /bin/sleep - "15" livenessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 name: main ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "1" memory: 2Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-82nwf readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp nodeName: ip-10-0-128-243.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 60 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - emptyDir: {} name: tmp-dir - name: tls-certs secret: defaultMode: 420 secretName: llmisve55ae740357a3a31a27cdb8b66ffe20f-kserve-self-signed-certs - name: kube-api-access-82nwf projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:06:40Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:06:39Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:06:49Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:06:49Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:06:39Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://cfc86b1647130920b0fede877a2100ac2a0ef189a788249428d22f85aa834abd image: ghcr.io/llm-d/llm-d-inference-sim:v0.8.2 imageID: ghcr.io/llm-d/llm-d-inference-sim@sha256:bab162bd25e2ed8b15022387cdb223023aeb33be49476af9f0115c0398fb8ff5 lastState: {} name: main ready: true resources: limits: cpu: "1" memory: 2Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:06:39Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-82nwf readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.243 hostIPs: - ip: 10.0.128.243 observedGeneration: 1 phase: Running podIP: 10.132.0.45 podIPs: - ip: 10.132.0.45 qosClass: Burstable startTime: "2026-06-15T06:06:39Z" - apiVersion: v1 kind: Pod metadata: annotations: app.kubernetes.io/version: 0.7.0 certificates.kserve.io/expiration-v2: "true" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.133.0.41/23"],"mac_address":"0a:58:0a:85:00:29","gateway_ips":["10.133.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.133.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.133.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.133.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.133.0.1"}],"ip_address":"10.133.0.41/23","gateway_ip":"10.133.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.133.0.41" ], "mac": "0a:58:0a:85:00:29", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:25:41Z" generateName: llmisvcca2d2d7d499abb359505529ebe02c136-kserve-router-scheduler-5597d7fd6- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-router-scheduler app.kubernetes.io/name: llmisvc-router-managed-test-llm-4b931143 app.kubernetes.io/part-of: llminferenceservice pod-template-hash: 5597d7fd6 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-141-25 operation: Update subresource: status time: "2026-06-15T06:25:41Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:app.kubernetes.io/version: {} f:certificates.kserve.io/expiration-v2: {} f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"d87575c6-df17-4fb6-b992-6d245459cce1"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":5557,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9002,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9003,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"tokenizer"}: .: {} f:env: .: {} k:{"name":"TOKENIZERS_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8082,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/.cache"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} f:workingDir: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tokenizer-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-tmp"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-uds"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:25:41Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:25:42Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.133.0.41"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:26:14Z" name: llmisvcca2d2d7d499abb359505529ebe02c136-kserve-router-schevgktk namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: llmisvcca2d2d7d499abb359505529ebe02c136-kserve-router-scheduler-5597d7fd6 uid: d87575c6-df17-4fb6-b992-6d245459cce1 resourceVersion: "41335" uid: 8e1b69b4-ea1b-42c3-9cb9-591db3c02643 spec: containers: - args: - --config-text - | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: single-profile-handler - type: queue-scorer - type: prefix-cache-scorer - type: max-score-picker schedulingProfiles: - name: default plugins: - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker command: - /app/epp - --pool-name - llmisvc-router-managed-test-llm-4b931143-inference-pool - --pool-namespace - kserve-ci-e2e-test - --zap-encoder - json - --grpc-port - "9002" - --grpc-health-port - "9003" - --enable-cert-reload=true - --secure-serving=true - --model-server-metrics-scheme=https - --cert-path=/var/run/kserve/tls env: - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 grpc: port: 9003 service: liveness initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 name: main ports: - containerPort: 9002 name: grpc protocol: TCP - containerPort: 9003 name: grpc-health protocol: TCP - containerPort: 9090 name: metrics protocol: TCP - containerPort: 5557 name: zmq protocol: TCP readinessProbe: failureThreshold: 3 grpc: port: 9003 service: readiness initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-8t9fg readOnly: true - env: - name: TOKENIZERS_DIR value: /mnt/models image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 5 name: tokenizer ports: - containerPort: 8082 name: health protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /healthz port: 8082 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-8t9fg readOnly: true workingDir: /mnt/models dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp - name: llmisvc-router-managed-test-llm-4b931143-epp-sa-dockercfg-rxpbg nodeName: ip-10-0-141-25.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: llmisvc-router-managed-test-llm-4b931143-epp-sa serviceAccountName: llmisvc-router-managed-test-llm-4b931143-epp-sa terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: tls-certs secret: defaultMode: 420 secretName: llmisvca2d2d7d499abb359505529ebe02c136-kserve-self-signed-certs - emptyDir: {} name: tokenizer-uds - emptyDir: {} name: tokenizer-tmp - emptyDir: {} name: tokenizer-cache - name: kube-api-access-8t9fg projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:25:43Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:25:41Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:26:14Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:26:14Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:25:41Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://547f599db73de1ca1ad8af0e4f755607186abe4236f58063abf49196fa6337e3 image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imageID: ghcr.io/llm-d/llm-d-inference-scheduler@sha256:88de279c6eb6758a4c600de9730e49e46b04c392846afedd03d82447379c9e7a lastState: {} name: main ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:25:42Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-8t9fg readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://0027d74da24955b56dcac9ad15899c3fafd670b7523765b2f48f0c7501b244b3 image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imageID: ghcr.io/llm-d/llm-d-uds-tokenizer@sha256:aed091a51f3d64458f1fdb451d21f745186bb4517a7ba0c49913a0c617366a3e lastState: {} name: tokenizer ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:25:42Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-8t9fg readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.141.25 hostIPs: - ip: 10.0.141.25 observedGeneration: 1 phase: Running podIP: 10.133.0.41 podIPs: - ip: 10.133.0.41 qosClass: Burstable startTime: "2026-06-15T06:25:41Z" - apiVersion: v1 kind: Pod metadata: annotations: app.kubernetes.io/version: 0.7.0 certificates.kserve.io/expiration-v2: "true" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.133.0.40/23"],"mac_address":"0a:58:0a:85:00:28","gateway_ips":["10.133.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.133.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.133.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.133.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.133.0.1"}],"ip_address":"10.133.0.40/23","gateway_ip":"10.133.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.133.0.40" ], "mac": "0a:58:0a:85:00:28", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:06:39Z" generateName: llmisvce55ae740357a3a31a27cdb8b66ffe20f-kserve-router-scheduler-68b6785c7d- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-router-scheduler app.kubernetes.io/name: llmisvc-router-managed-test-llm-5b1e8f15 app.kubernetes.io/part-of: llminferenceservice pod-template-hash: 68b6785c7d managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-141-25 operation: Update subresource: status time: "2026-06-15T06:06:39Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:app.kubernetes.io/version: {} f:certificates.kserve.io/expiration-v2: {} f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"0a54deb4-75ba-4b8b-8fb7-4a4fe96d4f69"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":5557,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9002,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9003,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"tokenizer"}: .: {} f:env: .: {} k:{"name":"TOKENIZERS_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8082,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/.cache"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} f:workingDir: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tokenizer-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-tmp"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-uds"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:06:39Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:06:39Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.133.0.40"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:07:11Z" name: llmisvce55ae740357a3a31a27cdb8b66ffe20f-kserve-router-sche8fvcj namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: llmisvce55ae740357a3a31a27cdb8b66ffe20f-kserve-router-scheduler-68b6785c7d uid: 0a54deb4-75ba-4b8b-8fb7-4a4fe96d4f69 resourceVersion: "27852" uid: 1264e5f9-09ea-4712-8848-1be6c22009fa spec: containers: - args: - --config-text - | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: single-profile-handler - type: queue-scorer - type: prefix-cache-scorer - type: max-score-picker schedulingProfiles: - name: default plugins: - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker command: - /app/epp - --pool-name - llmisvc-router-managed-test-llm-5b1e8f15-inference-pool - --pool-namespace - kserve-ci-e2e-test - --zap-encoder - json - --grpc-port - "9002" - --grpc-health-port - "9003" - --enable-cert-reload=true - --secure-serving=true - --model-server-metrics-scheme=https - --cert-path=/var/run/kserve/tls env: - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 grpc: port: 9003 service: liveness initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 name: main ports: - containerPort: 9002 name: grpc protocol: TCP - containerPort: 9003 name: grpc-health protocol: TCP - containerPort: 9090 name: metrics protocol: TCP - containerPort: 5557 name: zmq protocol: TCP readinessProbe: failureThreshold: 3 grpc: port: 9003 service: readiness initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-7jg2d readOnly: true - env: - name: TOKENIZERS_DIR value: /mnt/models image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 5 name: tokenizer ports: - containerPort: 8082 name: health protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /healthz port: 8082 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-7jg2d readOnly: true workingDir: /mnt/models dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp - name: llmisvc-router-managed-test-llm-5b1e8f15-epp-sa-dockercfg-rx77b nodeName: ip-10-0-141-25.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: llmisvc-router-managed-test-llm-5b1e8f15-epp-sa serviceAccountName: llmisvc-router-managed-test-llm-5b1e8f15-epp-sa terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: tls-certs secret: defaultMode: 420 secretName: llmisve55ae740357a3a31a27cdb8b66ffe20f-kserve-self-signed-certs - emptyDir: {} name: tokenizer-uds - emptyDir: {} name: tokenizer-tmp - emptyDir: {} name: tokenizer-cache - name: kube-api-access-7jg2d projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:06:40Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:06:39Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:07:11Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:07:11Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:06:39Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://c2d6e31fd4caa01e365ddd5e74ad16e61669ffc6a74474a0486fcc133a9a87b9 image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imageID: ghcr.io/llm-d/llm-d-inference-scheduler@sha256:88de279c6eb6758a4c600de9730e49e46b04c392846afedd03d82447379c9e7a lastState: {} name: main ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:06:39Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-7jg2d readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://f57a309a3b8c5c8eee249eff5a705a9ebacae81579281f80635f6407af08c507 image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imageID: ghcr.io/llm-d/llm-d-uds-tokenizer@sha256:aed091a51f3d64458f1fdb451d21f745186bb4517a7ba0c49913a0c617366a3e lastState: {} name: tokenizer ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:06:39Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-7jg2d readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.141.25 hostIPs: - ip: 10.0.141.25 observedGeneration: 1 phase: Running podIP: 10.133.0.40 podIPs: - ip: 10.133.0.40 qosClass: Burstable startTime: "2026-06-15T06:06:39Z" - apiVersion: v1 kind: Pod metadata: annotations: istio.io/rev: openshift-gateway k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.28/23"],"mac_address":"0a:58:0a:86:00:1c","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.28/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.28" ], "mac": "0a:58:0a:86:00:1c", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 prometheus.io/path: /stats/prometheus prometheus.io/port: "15020" prometheus.io/scrape: "true" seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:01:44Z" generateName: router-gateway-1-openshift-default-75dcfd69c9- generation: 1 labels: gateway.istio.io/managed: istio.io-gateway-controller gateway.networking.k8s.io/gateway-name: router-gateway-1 pod-template-hash: 75dcfd69c9 service.istio.io/canonical-name: router-gateway-1-openshift-default service.istio.io/canonical-revision: latest sidecar.istio.io/inject: "false" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-226 operation: Update subresource: status time: "2026-06-15T06:01:44Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:istio.io/rev: {} f:prometheus.io/path: {} f:prometheus.io/port: {} f:prometheus.io/scrape: {} f:generateName: {} f:labels: .: {} f:gateway.istio.io/managed: {} f:gateway.networking.k8s.io/gateway-name: {} f:pod-template-hash: {} f:service.istio.io/canonical-name: {} f:service.istio.io/canonical-revision: {} f:sidecar.istio.io/inject: {} f:ownerReferences: .: {} k:{"uid":"146ffe43-5265-40ae-82ba-6fab35f508a9"}: {} f:spec: f:containers: k:{"name":"istio-proxy"}: .: {} f:args: {} f:env: .: {} k:{"name":"CA_ADDR"}: .: {} f:name: {} f:value: {} k:{"name":"GOMAXPROCS"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"GOMEMLIMIT"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"HOST_IP"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"INSTANCE_IP"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"ISTIO_CPU_LIMIT"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"ISTIO_META_APP_CONTAINERS"}: .: {} f:name: {} k:{"name":"ISTIO_META_CLUSTER_ID"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_INTERCEPTION_MODE"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_MESH_ID"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_NODE_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"ISTIO_META_OWNER"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_POD_PORTS"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_WORKLOAD_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"PILOT_CERT_PROVIDER"}: .: {} f:name: {} f:value: {} k:{"name":"POD_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"POD_NAMESPACE"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"PROXY_CONFIG"}: .: {} f:name: {} f:value: {} k:{"name":"SERVICE_ACCOUNT"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"TRUST_DOMAIN"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:ports: .: {} k:{"containerPort":15020,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":15021,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":15090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:privileged: {} f:readOnlyRootFilesystem: {} f:runAsGroup: {} f:runAsNonRoot: {} f:runAsUser: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/etc/istio/pod"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/etc/istio/proxy"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/lib/istio/data"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/credential-uds"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/istio"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/tokens"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/workload-spiffe-credentials"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/workload-spiffe-uds"}: .: {} f:mountPath: {} f:name: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: .: {} f:sysctls: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"credential-socket"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"istio-data"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"istio-envoy"}: .: {} f:emptyDir: .: {} f:medium: {} f:name: {} k:{"name":"istio-podinfo"}: .: {} f:downwardAPI: .: {} f:defaultMode: {} f:items: {} f:name: {} k:{"name":"istio-token"}: .: {} f:name: {} f:projected: .: {} f:defaultMode: {} f:sources: {} k:{"name":"istiod-ca-cert"}: .: {} f:configMap: .: {} f:defaultMode: {} f:name: {} f:name: {} k:{"name":"workload-certs"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"workload-socket"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:01:44Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:01:45Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.28"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:02:34Z" name: router-gateway-1-openshift-default-75dcfd69c9-dh6qf namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-gateway-1-openshift-default-75dcfd69c9 uid: 146ffe43-5265-40ae-82ba-6fab35f508a9 resourceVersion: "23486" uid: e8405d0a-7af6-4f75-9d69-2a63b8579195 spec: containers: - args: - proxy - router - --domain - $(POD_NAMESPACE).svc.cluster.local - --proxyLogLevel - warning - --proxyComponentLogLevel - misc:error - --log_output_level - default:info env: - name: PILOT_CERT_PROVIDER value: istiod - name: CA_ADDR value: istiod-openshift-gateway.openshift-ingress.svc:15012 - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP - name: SERVICE_ACCOUNT valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.serviceAccountName - name: HOST_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.hostIP - name: ISTIO_CPU_LIMIT valueFrom: resourceFieldRef: divisor: "0" resource: limits.cpu - name: PROXY_CONFIG value: | {"discoveryAddress":"istiod-openshift-gateway.openshift-ingress.svc:15012","proxyHeaders":{"server":{"disabled":true},"envoyDebugHeaders":{"disabled":true},"metadataExchangeHeaders":{"mode":"IN_MESH"}}} - name: ISTIO_META_POD_PORTS value: '[]' - name: ISTIO_META_APP_CONTAINERS - name: GOMEMLIMIT valueFrom: resourceFieldRef: divisor: "0" resource: limits.memory - name: GOMAXPROCS valueFrom: resourceFieldRef: divisor: "0" resource: limits.cpu - name: ISTIO_META_CLUSTER_ID value: Kubernetes - name: ISTIO_META_NODE_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName - name: ISTIO_META_INTERCEPTION_MODE value: REDIRECT - name: ISTIO_META_WORKLOAD_NAME value: router-gateway-1-openshift-default - name: ISTIO_META_OWNER value: kubernetes://apis/apps/v1/namespaces/kserve-ci-e2e-test/deployments/router-gateway-1-openshift-default - name: ISTIO_META_MESH_ID value: cluster.local - name: TRUST_DOMAIN value: cluster.local image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:40be785b9abecd641f3121855a066c0ea01aba66e1350f33d175f2351c54e371 imagePullPolicy: IfNotPresent name: istio-proxy ports: - containerPort: 15020 name: metrics protocol: TCP - containerPort: 15021 name: status-port protocol: TCP - containerPort: 15090 name: http-envoy-prom protocol: TCP readinessProbe: failureThreshold: 4 httpGet: path: /healthz/ready port: 15021 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 1 resources: limits: cpu: "2" memory: 1Gi requests: cpu: 100m memory: 128Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL privileged: false readOnlyRootFilesystem: true runAsGroup: 1000709999 runAsNonRoot: true runAsUser: 1000709999 startupProbe: failureThreshold: 30 httpGet: path: /healthz/ready port: 15021 scheme: HTTP initialDelaySeconds: 1 periodSeconds: 1 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /var/run/secrets/workload-spiffe-uds name: workload-socket - mountPath: /var/run/secrets/credential-uds name: credential-socket - mountPath: /var/run/secrets/workload-spiffe-credentials name: workload-certs - mountPath: /var/run/secrets/istio name: istiod-ca-cert - mountPath: /var/lib/istio/data name: istio-data - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /var/run/secrets/tokens name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-gh29q readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: router-gateway-1-openshift-default-dockercfg-79qdt nodeName: ip-10-0-128-226.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault sysctls: - name: net.ipv4.ip_unprivileged_port_start value: "0" serviceAccount: router-gateway-1-openshift-default serviceAccountName: router-gateway-1-openshift-default terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: workload-socket - emptyDir: {} name: credential-socket - emptyDir: {} name: workload-certs - emptyDir: medium: Memory name: istio-envoy - emptyDir: {} name: istio-data - downwardAPI: defaultMode: 420 items: - fieldRef: apiVersion: v1 fieldPath: metadata.labels path: labels - fieldRef: apiVersion: v1 fieldPath: metadata.annotations path: annotations name: istio-podinfo - name: istio-token projected: defaultMode: 420 sources: - serviceAccountToken: audience: istio-ca expirationSeconds: 43200 path: istio-token - configMap: defaultMode: 420 name: openshift-gw-ca-root-cert name: istiod-ca-cert - name: kube-api-access-gh29q projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:01:48Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:01:44Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:02:34Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:02:34Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:01:44Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 100m memory: 128Mi containerID: cri-o://cb7cfe81481c68cb690261e8074283fdabc51ada243b8bad23f546c4dcf208bf image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:40be785b9abecd641f3121855a066c0ea01aba66e1350f33d175f2351c54e371 imageID: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:40be785b9abecd641f3121855a066c0ea01aba66e1350f33d175f2351c54e371 lastState: terminated: containerID: cri-o://faa715e63cf60dc52c97a5dd0eb4edf9de7769d6139b4438d59a54d813cf808e exitCode: 0 finishedAt: "2026-06-15T06:02:27Z" reason: Completed startedAt: "2026-06-15T06:01:48Z" name: istio-proxy ready: true resources: limits: cpu: "2" memory: 1Gi requests: cpu: 100m memory: 128Mi restartCount: 1 started: true state: running: startedAt: "2026-06-15T06:02:27Z" user: linux: gid: 1000709999 supplementalGroups: - 1000709999 - 1000700000 uid: 1000709999 volumeMounts: - mountPath: /var/run/secrets/workload-spiffe-uds name: workload-socket - mountPath: /var/run/secrets/credential-uds name: credential-socket - mountPath: /var/run/secrets/workload-spiffe-credentials name: workload-certs - mountPath: /var/run/secrets/istio name: istiod-ca-cert - mountPath: /var/lib/istio/data name: istio-data - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /var/run/secrets/tokens name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-gh29q readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.226 hostIPs: - ip: 10.0.128.226 observedGeneration: 1 phase: Running podIP: 10.134.0.28 podIPs: - ip: 10.134.0.28 qosClass: Burstable startTime: "2026-06-15T06:01:44Z" - apiVersion: v1 kind: Pod metadata: annotations: istio.io/rev: openshift-gateway k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.132.0.48/23"],"mac_address":"0a:58:0a:84:00:30","gateway_ips":["10.132.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.132.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.132.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.132.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.132.0.1"}],"ip_address":"10.132.0.48/23","gateway_ip":"10.132.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.132.0.48" ], "mac": "0a:58:0a:84:00:30", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 prometheus.io/path: /stats/prometheus prometheus.io/port: "15020" prometheus.io/scrape: "true" seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:52:11Z" generateName: router-gateway-2-openshift-default-78c98f6f4c- generation: 1 labels: gateway.istio.io/managed: istio.io-gateway-controller gateway.networking.k8s.io/gateway-name: router-gateway-2 pod-template-hash: 78c98f6f4c service.istio.io/canonical-name: router-gateway-2-openshift-default service.istio.io/canonical-revision: latest sidecar.istio.io/inject: "false" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-243 operation: Update subresource: status time: "2026-06-15T06:52:11Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:istio.io/rev: {} f:prometheus.io/path: {} f:prometheus.io/port: {} f:prometheus.io/scrape: {} f:generateName: {} f:labels: .: {} f:gateway.istio.io/managed: {} f:gateway.networking.k8s.io/gateway-name: {} f:pod-template-hash: {} f:service.istio.io/canonical-name: {} f:service.istio.io/canonical-revision: {} f:sidecar.istio.io/inject: {} f:ownerReferences: .: {} k:{"uid":"8ea3db84-7947-45ae-8a53-0cf00386234c"}: {} f:spec: f:containers: k:{"name":"istio-proxy"}: .: {} f:args: {} f:env: .: {} k:{"name":"CA_ADDR"}: .: {} f:name: {} f:value: {} k:{"name":"GOMAXPROCS"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"GOMEMLIMIT"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"HOST_IP"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"INSTANCE_IP"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"ISTIO_CPU_LIMIT"}: .: {} f:name: {} f:valueFrom: .: {} f:resourceFieldRef: {} k:{"name":"ISTIO_META_APP_CONTAINERS"}: .: {} f:name: {} k:{"name":"ISTIO_META_CLUSTER_ID"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_INTERCEPTION_MODE"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_MESH_ID"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_NODE_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"ISTIO_META_OWNER"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_POD_PORTS"}: .: {} f:name: {} f:value: {} k:{"name":"ISTIO_META_WORKLOAD_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"PILOT_CERT_PROVIDER"}: .: {} f:name: {} f:value: {} k:{"name":"POD_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"POD_NAMESPACE"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"PROXY_CONFIG"}: .: {} f:name: {} f:value: {} k:{"name":"SERVICE_ACCOUNT"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"TRUST_DOMAIN"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:ports: .: {} k:{"containerPort":15020,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":15021,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":15090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:privileged: {} f:readOnlyRootFilesystem: {} f:runAsGroup: {} f:runAsNonRoot: {} f:runAsUser: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/etc/istio/pod"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/etc/istio/proxy"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/lib/istio/data"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/credential-uds"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/istio"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/tokens"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/workload-spiffe-credentials"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/secrets/workload-spiffe-uds"}: .: {} f:mountPath: {} f:name: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: .: {} f:sysctls: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"credential-socket"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"istio-data"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"istio-envoy"}: .: {} f:emptyDir: .: {} f:medium: {} f:name: {} k:{"name":"istio-podinfo"}: .: {} f:downwardAPI: .: {} f:defaultMode: {} f:items: {} f:name: {} k:{"name":"istio-token"}: .: {} f:name: {} f:projected: .: {} f:defaultMode: {} f:sources: {} k:{"name":"istiod-ca-cert"}: .: {} f:configMap: .: {} f:defaultMode: {} f:name: {} f:name: {} k:{"name":"workload-certs"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"workload-socket"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:52:11Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:52:12Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.132.0.48"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:52:32Z" name: router-gateway-2-openshift-default-78c98f6f4c-ddrqp namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-gateway-2-openshift-default-78c98f6f4c uid: 8ea3db84-7947-45ae-8a53-0cf00386234c resourceVersion: "60448" uid: f371b3cf-555f-408b-83c0-9350d5cc73f7 spec: containers: - args: - proxy - router - --domain - $(POD_NAMESPACE).svc.cluster.local - --proxyLogLevel - warning - --proxyComponentLogLevel - misc:error - --log_output_level - default:info env: - name: PILOT_CERT_PROVIDER value: istiod - name: CA_ADDR value: istiod-openshift-gateway.openshift-ingress.svc:15012 - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP - name: SERVICE_ACCOUNT valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.serviceAccountName - name: HOST_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.hostIP - name: ISTIO_CPU_LIMIT valueFrom: resourceFieldRef: divisor: "0" resource: limits.cpu - name: PROXY_CONFIG value: | {"discoveryAddress":"istiod-openshift-gateway.openshift-ingress.svc:15012","proxyHeaders":{"server":{"disabled":true},"envoyDebugHeaders":{"disabled":true},"metadataExchangeHeaders":{"mode":"IN_MESH"}}} - name: ISTIO_META_POD_PORTS value: '[]' - name: ISTIO_META_APP_CONTAINERS - name: GOMEMLIMIT valueFrom: resourceFieldRef: divisor: "0" resource: limits.memory - name: GOMAXPROCS valueFrom: resourceFieldRef: divisor: "0" resource: limits.cpu - name: ISTIO_META_CLUSTER_ID value: Kubernetes - name: ISTIO_META_NODE_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName - name: ISTIO_META_INTERCEPTION_MODE value: REDIRECT - name: ISTIO_META_WORKLOAD_NAME value: router-gateway-2-openshift-default - name: ISTIO_META_OWNER value: kubernetes://apis/apps/v1/namespaces/kserve-ci-e2e-test/deployments/router-gateway-2-openshift-default - name: ISTIO_META_MESH_ID value: cluster.local - name: TRUST_DOMAIN value: cluster.local image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:40be785b9abecd641f3121855a066c0ea01aba66e1350f33d175f2351c54e371 imagePullPolicy: IfNotPresent name: istio-proxy ports: - containerPort: 15020 name: metrics protocol: TCP - containerPort: 15021 name: status-port protocol: TCP - containerPort: 15090 name: http-envoy-prom protocol: TCP readinessProbe: failureThreshold: 4 httpGet: path: /healthz/ready port: 15021 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 1 resources: limits: cpu: "2" memory: 1Gi requests: cpu: 100m memory: 128Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL privileged: false readOnlyRootFilesystem: true runAsGroup: 1000709999 runAsNonRoot: true runAsUser: 1000709999 startupProbe: failureThreshold: 30 httpGet: path: /healthz/ready port: 15021 scheme: HTTP initialDelaySeconds: 1 periodSeconds: 1 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /var/run/secrets/workload-spiffe-uds name: workload-socket - mountPath: /var/run/secrets/credential-uds name: credential-socket - mountPath: /var/run/secrets/workload-spiffe-credentials name: workload-certs - mountPath: /var/run/secrets/istio name: istiod-ca-cert - mountPath: /var/lib/istio/data name: istio-data - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /var/run/secrets/tokens name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-f2wh2 readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: router-gateway-2-openshift-default-dockercfg-xg4xw nodeName: ip-10-0-128-243.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault sysctls: - name: net.ipv4.ip_unprivileged_port_start value: "0" serviceAccount: router-gateway-2-openshift-default serviceAccountName: router-gateway-2-openshift-default terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: workload-socket - emptyDir: {} name: credential-socket - emptyDir: {} name: workload-certs - emptyDir: medium: Memory name: istio-envoy - emptyDir: {} name: istio-data - downwardAPI: defaultMode: 420 items: - fieldRef: apiVersion: v1 fieldPath: metadata.labels path: labels - fieldRef: apiVersion: v1 fieldPath: metadata.annotations path: annotations name: istio-podinfo - name: istio-token projected: defaultMode: 420 sources: - serviceAccountToken: audience: istio-ca expirationSeconds: 43200 path: istio-token - configMap: defaultMode: 420 name: openshift-gw-ca-root-cert name: istiod-ca-cert - name: kube-api-access-f2wh2 projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:15Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:11Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:31Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:31Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:11Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 100m memory: 128Mi containerID: cri-o://ce3685a199f04edd97c2b553570a005168e6c7c6f5550891a28eeb29aea205e1 image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:40be785b9abecd641f3121855a066c0ea01aba66e1350f33d175f2351c54e371 imageID: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:40be785b9abecd641f3121855a066c0ea01aba66e1350f33d175f2351c54e371 lastState: terminated: containerID: cri-o://9ae0dff3bf8b74e369c42955bafc86b92627d90687bde475fcfdae0b6bbd1a71 exitCode: 0 finishedAt: "2026-06-15T06:52:24Z" reason: Completed startedAt: "2026-06-15T06:52:14Z" name: istio-proxy ready: true resources: limits: cpu: "2" memory: 1Gi requests: cpu: 100m memory: 128Mi restartCount: 1 started: true state: running: startedAt: "2026-06-15T06:52:25Z" user: linux: gid: 1000709999 supplementalGroups: - 1000709999 - 1000700000 uid: 1000709999 volumeMounts: - mountPath: /var/run/secrets/workload-spiffe-uds name: workload-socket - mountPath: /var/run/secrets/credential-uds name: credential-socket - mountPath: /var/run/secrets/workload-spiffe-credentials name: workload-certs - mountPath: /var/run/secrets/istio name: istiod-ca-cert - mountPath: /var/lib/istio/data name: istio-data - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /var/run/secrets/tokens name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-f2wh2 readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.243 hostIPs: - ip: 10.0.128.243 observedGeneration: 1 phase: Running podIP: 10.132.0.48 podIPs: - ip: 10.132.0.48 qosClass: Burstable startTime: "2026-06-15T06:52:11Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.48/23"],"mac_address":"0a:58:0a:86:00:30","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.48/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.48" ], "mac": "0a:58:0a:86:00:30", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:52:26Z" generateName: router-with-refs-pd-test-kserve-6f78896447- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-workload app.kubernetes.io/name: router-with-refs-pd-test app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: decode pod-template-hash: 6f78896447 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-226 operation: Update subresource: status time: "2026-06-15T06:52:26Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"3de390c7-64f1-433b-af14-e1d9f47e1412"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"TORCHINDUCTOR_CACHE_DIR"}: .: {} f:name: {} f:value: {} k:{"name":"USER"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:lifecycle: .: {} f:preStop: .: {} f:exec: .: {} f:command: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8001,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"llm-d-routing-sidecar"}: .: {} f:command: {} f:env: .: {} k:{"name":"INFERENCE_POOL_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"INFERENCE_POOL_NAMESPACE"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: {} f:restartPolicy: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:52:26Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:52:26Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.48"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:54:36Z" name: router-with-refs-pd-test-kserve-6f78896447-wshh4 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-with-refs-pd-test-kserve-6f78896447 uid: 3de390c7-64f1-433b-af14-e1d9f47e1412 resourceVersion: "62193" uid: 5d2cc4b0-4097-4d57-9116-4a62c41cc76d spec: containers: - command: - /bin/bash - -c - |- if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then source /etc/profile.d/ibm-aiu-setup.sh fi if [ "$KSERVE_INFER_ROCE" = "true" ]; then echo "Trying to infer RoCE configs ... " grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null cat /proc/driver/nvidia/params KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"} echo "[Infer RoCE] Discovering active HCAs ..." active_hcas=() # Loop through all mlx5 devices found in sysfs for hca_dir in /sys/class/infiniband/mlx5_*; do # Ensure it's a directory before proceeding if [ -d "$hca_dir" ]; then hca_name=$(basename "$hca_dir") port_state_file="$hca_dir/ports/1/state" # Assume port 1 type_file="$hca_dir/ports/1/gid_attrs/types/*" echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'" if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then echo "[Infer RoCE] Found active HCA: $hca_name" active_hcas+=("$hca_name") else echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name" fi fi done ucx_hcas=() for hca in "${active_hcas[@]}"; do ucx_hcas+=("${hca}:1") done # Check if we found any active HCAs if [ ${#active_hcas[@]} -gt 0 ]; then # Join the array elements with a comma hcas=$(IFS=,; echo "${active_hcas[*]}") echo "[Infer RoCE] Setting active HCAs: ${hcas}" export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}} export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}} export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}} echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}" echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}" else echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set." fi if [ ${#active_hcas[@]} -gt 0 ]; then echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..." # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs declare -A gid_index_count declare -A hca_gid_index for hca_name in "${active_hcas[@]}"; do echo "[Infer RoCE] Processing HCA: ${hca_name}" # Find all RoCE v2 IPv4 GIDs for this HCA and count by index for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then idx=$(basename "$tpath") gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}" # Check for IPv4 GID (contains ffff:) if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then gid_value=$(cat "$gid_file" 2>/dev/null || echo "") echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}" hca_gid_index["${hca_name}"]="${idx}" gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1)) break # Use first found IPv4 GID per HCA fi fi done done # Find the most common GID index (most likely to be consistent across nodes) best_gid_index="" max_count=0 for idx in "${!gid_index_count[@]}"; do count=${gid_index_count["${idx}"]} echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs" if [ $count -gt $max_count ]; then max_count=$count best_gid_index="$idx" fi done # Use deterministic fallback if counts are equal - prefer lower index number if [ ${#gid_index_count[@]} -gt 1 ]; then echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}" # If there's a tie, prefer index 3 as it's most common in SR-IOV setups if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then best_gid_index="3" echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)" fi fi # Check if GID_INDEX is already set via environment variables if [ -n "${NCCL_IB_GID_INDEX}" ]; then echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment" export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX" elif [ -n "$best_gid_index" ]; then echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)" export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index} export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index} echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX" else echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA." fi else echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference." fi fi # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011). # Older versions still need the blanket --disable-uvicorn-access-log. ACCESS_LOG_ARGS="--disable-uvicorn-access-log" VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}') echo "[access-log-detect] vllm version='${VLLM_VERSION}'" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping" fi echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'" # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666). SHUTDOWN_TIMEOUT_ARGS="" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40" fi eval "exec vllm serve /mnt/models \ --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \ --port 8001 \ ${ACCESS_LOG_ARGS} \ ${SHUTDOWN_TIMEOUT_ARGS} \ --enable-ssl-refresh \ --ssl-certfile /var/run/kserve/tls/tls.crt \ --ssl-keyfile /var/run/kserve/tls/tls.key \ ${VLLM_ADDITIONAL_ARGS} \ $@" - -- env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: DEBUG - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: VLLM_ENABLE_V1_MULTIPROCESSING value: "0" - name: USER value: nonroot - name: TORCHINDUCTOR_CACHE_DIR value: /tmp/torchinductor-cache - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imagePullPolicy: IfNotPresent lifecycle: preStop: exec: command: - /bin/sleep - "15" livenessProbe: failureThreshold: 8 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 180 periodSeconds: 30 successThreshold: 1 timeoutSeconds: 30 name: main ports: - containerPort: 8001 protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8001 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt/models name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-s2zn9 readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp - name: router-with-refs-pd-test-kserve-dockercfg-nt6r8 initContainers: - command: - /app/pd-sidecar - --port=8000 - --vllm-port=8001 - --kv-connector=nixlv2 - --enable-ssrf-protection=true - --pool-group=inference.networking.x-k8s.io - --secure-proxy=true - --cert-path=/var/run/kserve/tls - --decoder-use-tls=true - --prefiller-use-tls=true env: - name: INFERENCE_POOL_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs - name: INFERENCE_POOL_NAME value: router-with-refs-pd-test-inference-pool image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 name: llm-d-routing-sidecar ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 10 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: {} restartPolicy: Always securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-s2zn9 readOnly: true - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-s2zn9 readOnly: true nodeName: ip-10-0-128-226.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: router-with-refs-pd-test-kserve serviceAccountName: router-with-refs-pd-test-kserve terminationGracePeriodSeconds: 60 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: {} name: tmp-dir - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - name: tls-certs secret: defaultMode: 420 secretName: router-with-refs-pd-test-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - name: kube-api-access-s2zn9 projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:26Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:31Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:54:36Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:54:36Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:26Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://d68822c89ab4c22d8c6af65fd68d08df9f1c852252b8d75890f4192af6f26a69 image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imageID: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo@sha256:afb39fca138b51d019d986229d546531b45a2a3deb73bcf59bd42406e13fbba0 lastState: {} name: main ready: true resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:52:32Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt/models name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-s2zn9 readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.226 hostIPs: - ip: 10.0.128.226 initContainerStatuses: - containerID: cri-o://2c5583e9d57e610617e4afe818b762c966b02171e8da1bb9cb31b6b6c39db1bc image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.7.1 imageID: ghcr.io/llm-d/llm-d-routing-sidecar@sha256:14ff2530c83bd6f95fa5b25309b150623b403da83f9152f635858f02163e2f95 lastState: {} name: llm-d-routing-sidecar ready: true resources: {} restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:52:26Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-s2zn9 readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://05b64457d70b2f1f5aced8df47a0ac28d696157ad85c665ae991a14efd8d41c3 image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://05b64457d70b2f1f5aced8df47a0ac28d696157ad85c665ae991a14efd8d41c3 exitCode: 0 finishedAt: "2026-06-15T06:52:31Z" reason: Completed startedAt: "2026-06-15T06:52:26Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-s2zn9 readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.134.0.48 podIPs: - ip: 10.134.0.48 qosClass: Burstable startTime: "2026-06-15T06:52:26Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.49/23"],"mac_address":"0a:58:0a:86:00:31","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.49/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.49" ], "mac": "0a:58:0a:86:00:31", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:52:26Z" generateName: router-with-refs-pd-test-kserve-prefill-5fc8578dd5- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-workload-prefill app.kubernetes.io/name: router-with-refs-pd-test app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: prefill pod-template-hash: 5fc8578dd5 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-226 operation: Update subresource: status time: "2026-06-15T06:52:26Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"b26d0305-15ed-46d8-987e-75acf4c15489"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"TORCHINDUCTOR_CACHE_DIR"}: .: {} f:name: {} f:value: {} k:{"name":"USER"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:lifecycle: .: {} f:preStop: .: {} f:exec: .: {} f:command: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:52:26Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:52:26Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.49"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:55:16Z" name: router-with-refs-pd-test-kserve-prefill-5fc8578dd5-d6lhp namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-with-refs-pd-test-kserve-prefill-5fc8578dd5 uid: b26d0305-15ed-46d8-987e-75acf4c15489 resourceVersion: "63041" uid: 9c26092a-3ad8-4fc2-879a-4b96a47c9166 spec: containers: - command: - /bin/bash - -c - |- if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then source /etc/profile.d/ibm-aiu-setup.sh fi if [ "$KSERVE_INFER_ROCE" = "true" ]; then echo "Trying to infer RoCE configs ... " grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null cat /proc/driver/nvidia/params KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"} echo "[Infer RoCE] Discovering active HCAs ..." active_hcas=() # Loop through all mlx5 devices found in sysfs for hca_dir in /sys/class/infiniband/mlx5_*; do # Ensure it's a directory before proceeding if [ -d "$hca_dir" ]; then hca_name=$(basename "$hca_dir") port_state_file="$hca_dir/ports/1/state" # Assume port 1 type_file="$hca_dir/ports/1/gid_attrs/types/*" echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'" if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then echo "[Infer RoCE] Found active HCA: $hca_name" active_hcas+=("$hca_name") else echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name" fi fi done ucx_hcas=() for hca in "${active_hcas[@]}"; do ucx_hcas+=("${hca}:1") done # Check if we found any active HCAs if [ ${#active_hcas[@]} -gt 0 ]; then # Join the array elements with a comma hcas=$(IFS=,; echo "${active_hcas[*]}") echo "[Infer RoCE] Setting active HCAs: ${hcas}" export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}} export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}} export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}} echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}" echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}" else echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set." fi if [ ${#active_hcas[@]} -gt 0 ]; then echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..." # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs declare -A gid_index_count declare -A hca_gid_index for hca_name in "${active_hcas[@]}"; do echo "[Infer RoCE] Processing HCA: ${hca_name}" # Find all RoCE v2 IPv4 GIDs for this HCA and count by index for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then idx=$(basename "$tpath") gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}" # Check for IPv4 GID (contains ffff:) if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then gid_value=$(cat "$gid_file" 2>/dev/null || echo "") echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}" hca_gid_index["${hca_name}"]="${idx}" gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1)) break # Use first found IPv4 GID per HCA fi fi done done # Find the most common GID index (most likely to be consistent across nodes) best_gid_index="" max_count=0 for idx in "${!gid_index_count[@]}"; do count=${gid_index_count["${idx}"]} echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs" if [ $count -gt $max_count ]; then max_count=$count best_gid_index="$idx" fi done # Use deterministic fallback if counts are equal - prefer lower index number if [ ${#gid_index_count[@]} -gt 1 ]; then echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}" # If there's a tie, prefer index 3 as it's most common in SR-IOV setups if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then best_gid_index="3" echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)" fi fi # Check if GID_INDEX is already set via environment variables if [ -n "${NCCL_IB_GID_INDEX}" ]; then echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment" export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX" elif [ -n "$best_gid_index" ]; then echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)" export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index} export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index} echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX" else echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA." fi else echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference." fi fi # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011). # Older versions still need the blanket --disable-uvicorn-access-log. ACCESS_LOG_ARGS="--disable-uvicorn-access-log" VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}') echo "[access-log-detect] vllm version='${VLLM_VERSION}'" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping" fi echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'" # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666). SHUTDOWN_TIMEOUT_ARGS="" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40" fi eval "exec vllm serve /mnt/models \ --served-model-name "facebook/opt-125m" \ --port 8000 \ ${ACCESS_LOG_ARGS} \ ${SHUTDOWN_TIMEOUT_ARGS} \ --enable-ssl-refresh \ --ssl-certfile /var/run/kserve/tls/tls.crt \ --ssl-keyfile /var/run/kserve/tls/tls.key \ ${VLLM_ADDITIONAL_ARGS} \ $@" - -- env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: DEBUG - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: VLLM_ENABLE_V1_MULTIPROCESSING value: "0" - name: USER value: nonroot - name: TORCHINDUCTOR_CACHE_DIR value: /tmp/torchinductor-cache - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imagePullPolicy: IfNotPresent lifecycle: preStop: exec: command: - /bin/sleep - "15" livenessProbe: failureThreshold: 8 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 180 periodSeconds: 30 successThreshold: 1 timeoutSeconds: 30 name: main ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt/models name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-kfcvt readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-kfcvt readOnly: true nodeName: ip-10-0-128-226.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 60 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: {} name: tmp-dir - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - name: tls-certs secret: defaultMode: 420 secretName: router-with-refs-pd-test-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - name: kube-api-access-kfcvt projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:26Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:53:18Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:55:16Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:55:16Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:26Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://fe63a26a8ca996b9f2ad0eb8a01417e5eee30c0abbe5167f727442ff8d283ce7 image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imageID: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo@sha256:afb39fca138b51d019d986229d546531b45a2a3deb73bcf59bd42406e13fbba0 lastState: {} name: main ready: true resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:53:18Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt/models name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-kfcvt readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.226 hostIPs: - ip: 10.0.128.226 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://b8415f513b0225674e4ba57b4ebab3ffddda0b02f5a7f22ff52ec88b94a4e1ef image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://b8415f513b0225674e4ba57b4ebab3ffddda0b02f5a7f22ff52ec88b94a4e1ef exitCode: 0 finishedAt: "2026-06-15T06:53:18Z" reason: Completed startedAt: "2026-06-15T06:52:26Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-kfcvt readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.134.0.49 podIPs: - ip: 10.134.0.49 qosClass: Burstable startTime: "2026-06-15T06:52:26Z" - apiVersion: v1 kind: Pod metadata: annotations: app.kubernetes.io/version: 0.7.0 certificates.kserve.io/expiration-v2: "true" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.133.0.45/23"],"mac_address":"0a:58:0a:85:00:2d","gateway_ips":["10.133.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.133.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.133.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.133.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.133.0.1"}],"ip_address":"10.133.0.45/23","gateway_ip":"10.133.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.133.0.45" ], "mac": "0a:58:0a:85:00:2d", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:52:28Z" generateName: router-with-refs-pd-test-kserve-router-scheduler-5f7487fdfb- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-router-scheduler app.kubernetes.io/name: router-with-refs-pd-test app.kubernetes.io/part-of: llminferenceservice pod-template-hash: 5f7487fdfb managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-141-25 operation: Update subresource: status time: "2026-06-15T06:52:28Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:app.kubernetes.io/version: {} f:certificates.kserve.io/expiration-v2: {} f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"0c8740dd-1b7a-4125-9d09-4e1cc9c1ff89"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":5557,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9002,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9003,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"tokenizer"}: .: {} f:env: .: {} k:{"name":"TOKENIZERS_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8082,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/.cache"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models/base"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} f:workingDir: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} k:{"name":"STORAGE_ALLOW_PATTERNS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tokenizer-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-tmp"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-uds"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:52:28Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:52:28Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.133.0.45"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:53:00Z" name: router-with-refs-pd-test-kserve-router-scheduler-5f7487fdfmr99b namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-with-refs-pd-test-kserve-router-scheduler-5f7487fdfb uid: 0c8740dd-1b7a-4125-9d09-4e1cc9c1ff89 resourceVersion: "60854" uid: bb6c90a3-f29e-457c-b4ff-2d9c233c7b18 spec: containers: - args: - --config-text - | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: disagg-headers-handler - type: prefill-filter - type: decode-filter - type: queue-scorer - type: prefix-cache-scorer - type: max-score-picker - type: always-disagg-pd-decider - parameters: deciders: prefill: always-disagg-pd-decider type: disagg-profile-handler schedulingProfiles: - name: prefill plugins: - pluginRef: prefill-filter - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker - name: decode plugins: - pluginRef: decode-filter - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker command: - /app/epp - --pool-name - router-with-refs-pd-test-inference-pool - --pool-namespace - kserve-ci-e2e-test - --zap-encoder - json - --grpc-port - "9002" - --grpc-health-port - "9003" - --enable-cert-reload=true - --secure-serving=true - --model-server-metrics-scheme=https - --cert-path=/var/run/kserve/tls env: - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 grpc: port: 9003 service: liveness initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 name: main ports: - containerPort: 9002 name: grpc protocol: TCP - containerPort: 9003 name: grpc-health protocol: TCP - containerPort: 9090 name: metrics protocol: TCP - containerPort: 5557 name: zmq protocol: TCP readinessProbe: failureThreshold: 3 grpc: port: 9003 service: readiness initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t9kqp readOnly: true - env: - name: TOKENIZERS_DIR value: /mnt/models image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 5 name: tokenizer ports: - containerPort: 8082 name: health protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /healthz port: 8082 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t9kqp readOnly: true workingDir: /mnt/models dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp - name: router-with-refs-pd-test-epp-sa-dockercfg-zqkmt initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" - name: STORAGE_ALLOW_PATTERNS value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "vocab.json", "merges.txt", "config.json", "generation_config.json"]' image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t9kqp readOnly: true nodeName: ip-10-0-141-25.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: router-with-refs-pd-test-epp-sa serviceAccountName: router-with-refs-pd-test-epp-sa terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: tls-certs secret: defaultMode: 420 secretName: router-with-refs-pd-test-kserve-self-signed-certs - emptyDir: {} name: tokenizer-uds - emptyDir: {} name: tokenizer-tmp - emptyDir: {} name: tokenizer-cache - emptyDir: {} name: kserve-provision-location - name: kube-api-access-t9kqp projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:29Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:30Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:53:00Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:53:00Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:52:28Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://416e6aa380be17d437d8fbc3bd0d12992e4125801d118845cc8706877afb3eb9 image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imageID: ghcr.io/llm-d/llm-d-inference-scheduler@sha256:88de279c6eb6758a4c600de9730e49e46b04c392846afedd03d82447379c9e7a lastState: {} name: main ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:52:30Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t9kqp readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://f4ef13056d3d95581c0612908e5ac35deeb001f9dec7e33f6c7404acd1e899f5 image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imageID: ghcr.io/llm-d/llm-d-uds-tokenizer@sha256:aed091a51f3d64458f1fdb451d21f745186bb4517a7ba0c49913a0c617366a3e lastState: {} name: tokenizer ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:52:30Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t9kqp readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.141.25 hostIPs: - ip: 10.0.141.25 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://b0165dd19df93ec101af23e0273c86d22b0d98fc86065345f99a625f3d9da714 image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://b0165dd19df93ec101af23e0273c86d22b0d98fc86065345f99a625f3d9da714 exitCode: 0 finishedAt: "2026-06-15T06:52:29Z" reason: Completed startedAt: "2026-06-15T06:52:28Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-t9kqp readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.133.0.45 podIPs: - ip: 10.133.0.45 qosClass: Burstable startTime: "2026-06-15T06:52:28Z" - apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.41/23"],"mac_address":"0a:58:0a:86:00:29","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.41/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.41" ], "mac": "0a:58:0a:86:00:29", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:30:12Z" generateName: router-with-refs-test-kserve-578d595fc- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-workload app.kubernetes.io/name: router-with-refs-test app.kubernetes.io/part-of: llminferenceservice kserve.io/component: workload llm-d.ai/role: both pod-template-hash: 578d595fc managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-226 operation: Update subresource: status time: "2026-06-15T06:30:12Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:kserve.io/component: {} f:llm-d.ai/role: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"0d603bab-21f2-4f77-87e7-1041dbaae626"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:command: {} f:env: .: {} k:{"name":"HF_HUB_CACHE"}: .: {} f:name: {} f:value: {} k:{"name":"HOME"}: .: {} f:name: {} f:value: {} k:{"name":"TORCHINDUCTOR_CACHE_DIR"}: .: {} f:name: {} f:value: {} k:{"name":"USER"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_CPU_KVCACHE_SPACE"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}: .: {} f:name: {} f:value: {} k:{"name":"VLLM_LOGGING_LEVEL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:lifecycle: .: {} f:preStop: .: {} f:exec: .: {} f:command: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8000,"protocol":"TCP"}: .: {} f:containerPort: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/dev/shm"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/home"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/models"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"dshm"}: .: {} f:emptyDir: .: {} f:medium: {} f:sizeLimit: {} f:name: {} k:{"name":"home"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"model-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tmp-dir"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:30:12Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:30:12Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.41"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:32:22Z" name: router-with-refs-test-kserve-578d595fc-gtvkx namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-with-refs-test-kserve-578d595fc uid: 0d603bab-21f2-4f77-87e7-1041dbaae626 resourceVersion: "45742" uid: c15f33fe-f482-409c-8011-41250b69694d spec: containers: - command: - /bin/bash - -c - |- if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then source /etc/profile.d/ibm-aiu-setup.sh fi if [ "$KSERVE_INFER_ROCE" = "true" ]; then echo "Trying to infer RoCE configs ... " grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null cat /proc/driver/nvidia/params KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"} echo "[Infer RoCE] Discovering active HCAs ..." active_hcas=() # Loop through all mlx5 devices found in sysfs for hca_dir in /sys/class/infiniband/mlx5_*; do # Ensure it's a directory before proceeding if [ -d "$hca_dir" ]; then hca_name=$(basename "$hca_dir") port_state_file="$hca_dir/ports/1/state" # Assume port 1 type_file="$hca_dir/ports/1/gid_attrs/types/*" echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'" if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then echo "[Infer RoCE] Found active HCA: $hca_name" active_hcas+=("$hca_name") else echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name" fi fi done ucx_hcas=() for hca in "${active_hcas[@]}"; do ucx_hcas+=("${hca}:1") done # Check if we found any active HCAs if [ ${#active_hcas[@]} -gt 0 ]; then # Join the array elements with a comma hcas=$(IFS=,; echo "${active_hcas[*]}") echo "[Infer RoCE] Setting active HCAs: ${hcas}" export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}} export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}} export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}} echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}" echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}" else echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set." fi if [ ${#active_hcas[@]} -gt 0 ]; then echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..." # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs declare -A gid_index_count declare -A hca_gid_index for hca_name in "${active_hcas[@]}"; do echo "[Infer RoCE] Processing HCA: ${hca_name}" # Find all RoCE v2 IPv4 GIDs for this HCA and count by index for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then idx=$(basename "$tpath") gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}" # Check for IPv4 GID (contains ffff:) if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then gid_value=$(cat "$gid_file" 2>/dev/null || echo "") echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}" hca_gid_index["${hca_name}"]="${idx}" gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1)) break # Use first found IPv4 GID per HCA fi fi done done # Find the most common GID index (most likely to be consistent across nodes) best_gid_index="" max_count=0 for idx in "${!gid_index_count[@]}"; do count=${gid_index_count["${idx}"]} echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs" if [ $count -gt $max_count ]; then max_count=$count best_gid_index="$idx" fi done # Use deterministic fallback if counts are equal - prefer lower index number if [ ${#gid_index_count[@]} -gt 1 ]; then echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}" # If there's a tie, prefer index 3 as it's most common in SR-IOV setups if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then best_gid_index="3" echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)" fi fi # Check if GID_INDEX is already set via environment variables if [ -n "${NCCL_IB_GID_INDEX}" ]; then echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment" export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX} echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX" elif [ -n "$best_gid_index" ]; then echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)" export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index} export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index} export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index} echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX" else echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA." fi else echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference." fi fi # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011). # Older versions still need the blanket --disable-uvicorn-access-log. ACCESS_LOG_ARGS="--disable-uvicorn-access-log" VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}') echo "[access-log-detect] vllm version='${VLLM_VERSION}'" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping" fi echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'" # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666). SHUTDOWN_TIMEOUT_ARGS="" if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40" fi eval "exec vllm serve /mnt/models \ --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \ --port 8000 \ ${ACCESS_LOG_ARGS} \ ${SHUTDOWN_TIMEOUT_ARGS} \ --enable-ssl-refresh \ --ssl-certfile /var/run/kserve/tls/tls.crt \ --ssl-keyfile /var/run/kserve/tls/tls.key \ ${VLLM_ADDITIONAL_ARGS} \ $@" - -- env: - name: HOME value: /home - name: VLLM_LOGGING_LEVEL value: DEBUG - name: VLLM_CPU_KVCACHE_SPACE value: "1" - name: VLLM_ENABLE_V1_MULTIPROCESSING value: "0" - name: USER value: nonroot - name: TORCHINDUCTOR_CACHE_DIR value: /tmp/torchinductor-cache - name: HF_HUB_CACHE value: /models image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imagePullPolicy: IfNotPresent lifecycle: preStop: exec: command: - /bin/sleep - "15" livenessProbe: failureThreshold: 3 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 10 name: main ports: - containerPort: 8000 protocol: TCP readinessProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /health port: 8000 scheme: HTTPS periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /mnt/models name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-tdqbj readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-tdqbj readOnly: true nodeName: ip-10-0-128-226.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 60 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - emptyDir: {} name: home - emptyDir: medium: Memory sizeLimit: 1Gi name: dshm - emptyDir: {} name: model-cache - emptyDir: {} name: tmp-dir - name: tls-certs secret: defaultMode: 420 secretName: router-with-refs-test-kserve-self-signed-certs - emptyDir: {} name: kserve-provision-location - name: kube-api-access-tdqbj projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:30:12Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:30:24Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:32:22Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:32:22Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:30:12Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 200m memory: 2Gi containerID: cri-o://8c7c363c0368b1dd791993f3898677a646e12c2af435e2e198adb3ba9c1f22db image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0 imageID: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo@sha256:afb39fca138b51d019d986229d546531b45a2a3deb73bcf59bd42406e13fbba0 lastState: {} name: main ready: true resources: limits: cpu: "2" memory: 7Gi requests: cpu: 200m memory: 2Gi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:30:24Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /home name: home - mountPath: /tmp name: tmp-dir - mountPath: /dev/shm name: dshm - mountPath: /models name: model-cache - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /mnt/models name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-tdqbj readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.226 hostIPs: - ip: 10.0.128.226 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://dbadf93e026882e53c6bdf1347713790357cd44393bea4f60a643d328ceaf7e4 image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://dbadf93e026882e53c6bdf1347713790357cd44393bea4f60a643d328ceaf7e4 exitCode: 0 finishedAt: "2026-06-15T06:30:23Z" reason: Completed startedAt: "2026-06-15T06:30:12Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-tdqbj readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.134.0.41 podIPs: - ip: 10.134.0.41 qosClass: Burstable startTime: "2026-06-15T06:30:12Z" - apiVersion: v1 kind: Pod metadata: annotations: app.kubernetes.io/version: 0.7.0 certificates.kserve.io/expiration-v2: "true" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.42/23"],"mac_address":"0a:58:0a:86:00:2a","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.42/23","gateway_ip":"10.134.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.134.0.42" ], "mac": "0a:58:0a:86:00:2a", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-06-15T06:30:12Z" generateName: router-with-refs-test-kserve-router-scheduler-7d4868d689- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-router-scheduler app.kubernetes.io/name: router-with-refs-test app.kubernetes.io/part-of: llminferenceservice pod-template-hash: 7d4868d689 managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-128-226 operation: Update subresource: status time: "2026-06-15T06:30:12Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:app.kubernetes.io/version: {} f:certificates.kserve.io/expiration-v2: {} f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"dfa69409-5eec-4453-854d-0e1a9b183345"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":5557,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9002,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9003,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"tokenizer"}: .: {} f:env: .: {} k:{"name":"TOKENIZERS_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8082,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/.cache"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models/base"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} f:workingDir: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"AWS_ACCESS_KEY_ID"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"AWS_CA_BUNDLE"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_ENDPOINT_URL"}: .: {} f:name: {} f:value: {} k:{"name":"AWS_SECRET_ACCESS_KEY"}: .: {} f:name: {} f:valueFrom: .: {} f:secretKeyRef: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_ENDPOINT"}: .: {} f:name: {} f:value: {} k:{"name":"S3_USE_HTTPS"}: .: {} f:name: {} f:value: {} k:{"name":"S3_VERIFY_SSL"}: .: {} f:name: {} f:value: {} k:{"name":"STORAGE_ALLOW_PATTERNS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tokenizer-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-tmp"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-uds"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-06-15T06:30:12Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-06-15T06:30:13Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} k:{"type":"PodScheduled"}: f:observedGeneration: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:observedGeneration: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:observedGeneration: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.134.0.42"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-06-15T06:30:54Z" name: router-with-refs-test-kserve-router-scheduler-7d4868d689-h4c76 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: router-with-refs-test-kserve-router-scheduler-7d4868d689 uid: dfa69409-5eec-4453-854d-0e1a9b183345 resourceVersion: "44794" uid: c837db67-d0c8-447e-87c9-5dec6015f51f spec: containers: - args: - --config-text - | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: single-profile-handler - type: queue-scorer - type: prefix-cache-scorer - type: max-score-picker schedulingProfiles: - name: default plugins: - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker command: - /app/epp - --pool-name - router-with-refs-test-inference-pool - --pool-namespace - kserve-ci-e2e-test - --zap-encoder - json - --grpc-port - "9002" - --grpc-health-port - "9003" - --enable-cert-reload=true - --secure-serving=true - --model-server-metrics-scheme=https - --cert-path=/var/run/kserve/tls env: - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 grpc: port: 9003 service: liveness initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 name: main ports: - containerPort: 9002 name: grpc protocol: TCP - containerPort: 9003 name: grpc-health protocol: TCP - containerPort: 9090 name: metrics protocol: TCP - containerPort: 5557 name: zmq protocol: TCP readinessProbe: failureThreshold: 3 grpc: port: 9003 service: readiness initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-4rv7r readOnly: true - env: - name: TOKENIZERS_DIR value: /mnt/models image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 5 name: tokenizer ports: - containerPort: 8082 name: health protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000700000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /healthz port: 8082 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-4rv7r readOnly: true workingDir: /mnt/models dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-fjfwp - name: router-with-refs-test-epp-sa-dockercfg-5mggr initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: AWS_ACCESS_KEY_ID name: seaweedfs-s3-creds - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: AWS_SECRET_ACCESS_KEY name: seaweedfs-s3-creds - name: S3_USE_HTTPS value: "0" - name: S3_ENDPOINT value: s3-service.kserve:8333 - name: AWS_ENDPOINT_URL value: http://s3-service.kserve:8333 - name: S3_VERIFY_SSL value: "0" - name: AWS_CA_BUNDLE value: /etc/ssl/custom-certs/cabundle.crt - name: AWS_CA_BUNDLE_CONFIGMAP value: odh-kserve-custom-ca-bundle - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" - name: STORAGE_ALLOW_PATTERNS value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "vocab.json", "merges.txt", "config.json", "generation_config.json"]' image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000700000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-4rv7r readOnly: true nodeName: ip-10-0-128-226.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000700000 seLinuxOptions: level: s0:c26,c25 seccompProfile: type: RuntimeDefault serviceAccount: router-with-refs-test-epp-sa serviceAccountName: router-with-refs-test-epp-sa terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: tls-certs secret: defaultMode: 420 secretName: router-with-refs-test-kserve-self-signed-certs - emptyDir: {} name: tokenizer-uds - emptyDir: {} name: tokenizer-tmp - emptyDir: {} name: tokenizer-cache - emptyDir: {} name: kserve-provision-location - name: kube-api-access-4rv7r projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-06-15T06:30:13Z" observedGeneration: 1 status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-06-15T06:30:14Z" observedGeneration: 1 status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-06-15T06:30:54Z" observedGeneration: 1 status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-06-15T06:30:54Z" observedGeneration: 1 status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-06-15T06:30:12Z" observedGeneration: 1 status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://803e4a2f17d5f799d87d17f68a1a177346f5ded01e24303c753de50ba88947ce image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imageID: ghcr.io/llm-d/llm-d-inference-scheduler@sha256:88de279c6eb6758a4c600de9730e49e46b04c392846afedd03d82447379c9e7a lastState: {} name: main ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:30:15Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-4rv7r readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://f44c996a236d3e1910dee3fb14563f4bc2dc932284021163b6def597a43b3614 image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imageID: ghcr.io/llm-d/llm-d-uds-tokenizer@sha256:aed091a51f3d64458f1fdb451d21f745186bb4517a7ba0c49913a0c617366a3e lastState: {} name: tokenizer ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-06-15T06:30:15Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-4rv7r readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.128.226 hostIPs: - ip: 10.0.128.226 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://baffc1708f3a7efef0c0b049d34c2ca7f8c6aa71d01be02675cd452ffb11b1c6 image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:002b0d8b8a0a27ede61dd8a8fe85971fe09fa0abcbb90ad99f092e41c4fb46a7 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://baffc1708f3a7efef0c0b049d34c2ca7f8c6aa71d01be02675cd452ffb11b1c6 exitCode: 0 finishedAt: "2026-06-15T06:30:13Z" reason: Completed startedAt: "2026-06-15T06:30:13Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000700000 uid: 1000700000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-4rv7r readOnly: true recursiveReadOnly: Disabled observedGeneration: 1 phase: Running podIP: 10.134.0.42 podIPs: - ip: 10.134.0.42 qosClass: Burstable startTime: "2026-06-15T06:30:12Z" kind: PodList metadata: resourceVersion: "97581"