--- apiVersion: v1 kind: Pod metadata: annotations: k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.132.0.39/23"],"mac_address":"0a:58:0a:84:00:27","gateway_ips":["10.132.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.132.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.132.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.132.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.132.0.1"}],"ip_address":"10.132.0.39/23","gateway_ip":"10.132.0.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.132.0.39" ], "mac": "0a:58:0a:84:00:27", "default": true, "dns": {} }] openshift.io/scc: restricted-v2 seccomp.security.alpha.kubernetes.io/pod: runtime/default security.openshift.io/validated-scc-subject-type: user creationTimestamp: "2026-04-24T17:13:34Z" generateName: stop-feature-test-kserve-router-scheduler-6c85574b4f- generation: 1 labels: app.kubernetes.io/component: llminferenceservice-router-scheduler app.kubernetes.io/name: stop-feature-test app.kubernetes.io/part-of: llminferenceservice pod-template-hash: 6c85574b4f managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.ovn.org/pod-networks: {} manager: ip-10-0-137-69 operation: Update subresource: status time: "2026-04-24T17:13:34Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:generateName: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/part-of: {} f:pod-template-hash: {} f:ownerReferences: .: {} k:{"uid":"998579d2-3426-4020-a0c1-7956f2af0e04"}: {} f:spec: f:containers: k:{"name":"main"}: .: {} f:args: {} f:command: {} f:env: .: {} k:{"name":"SSL_CERT_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":5557,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9002,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9003,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:grpc: .: {} f:port: {} f:service: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/var/run/kserve/tls"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"name":"tokenizer"}: .: {} f:env: .: {} k:{"name":"TOKENIZERS_DIR"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8082,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:seccompProfile: .: {} f:type: {} f:startupProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/.cache"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/mnt/models/base"}: .: {} f:mountPath: {} f:name: {} f:readOnly: {} k:{"mountPath":"/tmp"}: .: {} f:mountPath: {} f:name: {} k:{"mountPath":"/tmp/tokenizer"}: .: {} f:mountPath: {} f:name: {} f:workingDir: {} f:dnsPolicy: {} f:enableServiceLinks: {} f:initContainers: .: {} k:{"name":"storage-initializer"}: .: {} f:args: {} f:env: .: {} k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_HIGH_PERFORMANCE"}: .: {} f:name: {} f:value: {} k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}: .: {} f:name: {} f:value: {} k:{"name":"STORAGE_ALLOW_PATTERNS"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/mnt/models"}: .: {} f:mountPath: {} f:name: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"kserve-provision-location"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tls-certs"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} k:{"name":"tokenizer-cache"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-tmp"}: .: {} f:emptyDir: {} f:name: {} k:{"name":"tokenizer-uds"}: .: {} f:emptyDir: {} f:name: {} manager: kube-controller-manager operation: Update time: "2026-04-24T17:13:34Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:k8s.v1.cni.cncf.io/network-status: {} manager: multus-daemon operation: Update subresource: status time: "2026-04-24T17:13:35Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:status: f:conditions: k:{"type":"ContainersReady"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Initialized"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"PodReadyToStartContainers"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} k:{"type":"Ready"}: .: {} f:lastProbeTime: {} f:lastTransitionTime: {} f:status: {} f:type: {} f:containerStatuses: {} f:hostIP: {} f:hostIPs: {} f:initContainerStatuses: {} f:phase: {} f:podIP: {} f:podIPs: .: {} k:{"ip":"10.132.0.39"}: .: {} f:ip: {} f:startTime: {} manager: kubelet operation: Update subresource: status time: "2026-04-24T17:14:06Z" name: stop-feature-test-kserve-router-scheduler-6c85574b4f-hkzx8 namespace: kserve-ci-e2e-test ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: stop-feature-test-kserve-router-scheduler-6c85574b4f uid: 998579d2-3426-4020-a0c1-7956f2af0e04 resourceVersion: "39686" uid: a31a70b8-65d8-418e-aa56-02717c6e7e73 spec: containers: - args: - --config-text - | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: single-profile-handler - type: queue-scorer - type: prefix-cache-scorer - type: max-score-picker schedulingProfiles: - name: default plugins: - pluginRef: queue-scorer weight: 2 - pluginRef: prefix-cache-scorer weight: 3 - pluginRef: max-score-picker command: - /app/epp - --pool-name - stop-feature-test-inference-pool - --pool-namespace - kserve-ci-e2e-test - --zap-encoder - json - --grpc-port - "9002" - --grpc-health-port - "9003" - --enable-cert-reload=true - --secure-serving=true - --model-server-metrics-scheme=https - --cert-path=/var/run/kserve/tls env: - name: SSL_CERT_DIR value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 grpc: port: 9003 service: liveness initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 name: main ports: - containerPort: 9002 name: grpc protocol: TCP - containerPort: 9003 name: grpc-health protocol: TCP - containerPort: 9090 name: metrics protocol: TCP - containerPort: 5557 name: zmq protocol: TCP readinessProbe: failureThreshold: 3 grpc: port: 9003 service: readiness initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000690000 seccompProfile: type: RuntimeDefault terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-wqvd7 readOnly: true - env: - name: TOKENIZERS_DIR value: /mnt/models image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 15 successThreshold: 1 timeoutSeconds: 5 name: tokenizer ports: - containerPort: 8082 name: health protocol: TCP readinessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8082 scheme: HTTP periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 256m memory: 500Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000690000 seccompProfile: type: RuntimeDefault startupProbe: failureThreshold: 60 httpGet: path: /healthz port: 8082 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-wqvd7 readOnly: true workingDir: /mnt/models dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-q89jh - name: stop-feature-test-epp-sa-dockercfg-hqqcv initContainers: - args: - hf://facebook/opt-125m - /mnt/models env: - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - name: HF_XET_HIGH_PERFORMANCE value: "1" - name: HF_XET_NUM_CONCURRENT_RANGE_GETS value: "8" - name: STORAGE_ALLOW_PATTERNS value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "vocab.json", "merges.txt", "config.json", "generation_config.json"]' image: quay.io/opendatahub/kserve-storage-initializer@sha256:e180c5801eecc9f4bbb52efa5a04a7e91697942e5c91b352ed2867858a22963a imagePullPolicy: IfNotPresent name: storage-initializer resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true runAsUser: 1000690000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-wqvd7 readOnly: true nodeName: ip-10-0-137-69.ec2.internal preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000690000 seLinuxOptions: level: s0:c26,c20 seccompProfile: type: RuntimeDefault serviceAccount: stop-feature-test-epp-sa serviceAccountName: stop-feature-test-epp-sa terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: tls-certs secret: defaultMode: 420 secretName: stop-feature-test-kserve-self-signed-certs - emptyDir: {} name: tokenizer-uds - emptyDir: {} name: tokenizer-tmp - emptyDir: {} name: tokenizer-cache - emptyDir: {} name: kserve-provision-location - name: kube-api-access-wqvd7 projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2026-04-24T17:13:35Z" status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2026-04-24T17:13:36Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2026-04-24T17:14:06Z" status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2026-04-24T17:14:06Z" status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2026-04-24T17:13:34Z" status: "True" type: PodScheduled containerStatuses: - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://78788a144f0e419cf41c1b614f9323139e0b498578f82daed7c94cfa64e3dd0d image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1 imageID: ghcr.io/llm-d/llm-d-inference-scheduler@sha256:88de279c6eb6758a4c600de9730e49e46b04c392846afedd03d82447379c9e7a lastState: {} name: main ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-04-24T17:13:36Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000690000 uid: 1000690000 volumeMounts: - mountPath: /var/run/kserve/tls name: tls-certs readOnly: true recursiveReadOnly: Disabled - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-wqvd7 readOnly: true recursiveReadOnly: Disabled - allocatedResources: cpu: 256m memory: 500Mi containerID: cri-o://376d9725f52d70ae6d122c0e798091b6bb876f1d9da95fa7e22c8c912417b6f5 image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1 imageID: ghcr.io/llm-d/llm-d-uds-tokenizer@sha256:aed091a51f3d64458f1fdb451d21f745186bb4517a7ba0c49913a0c617366a3e lastState: {} name: tokenizer ready: true resources: requests: cpu: 256m memory: 500Mi restartCount: 0 started: true state: running: startedAt: "2026-04-24T17:13:36Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000690000 uid: 1000690000 volumeMounts: - mountPath: /tmp name: tokenizer-tmp - mountPath: /.cache name: tokenizer-cache - mountPath: /tmp/tokenizer name: tokenizer-uds - mountPath: /mnt/models/base name: kserve-provision-location readOnly: true recursiveReadOnly: Disabled - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-wqvd7 readOnly: true recursiveReadOnly: Disabled hostIP: 10.0.137.69 hostIPs: - ip: 10.0.137.69 initContainerStatuses: - allocatedResources: cpu: 100m memory: 100Mi containerID: cri-o://92af2fde0f62e107fc682a12f873b950bfee813bb27dfbc16a6a4df4022e9ab4 image: quay.io/opendatahub/kserve-storage-initializer@sha256:e180c5801eecc9f4bbb52efa5a04a7e91697942e5c91b352ed2867858a22963a imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:beb2d86857b6be056be5b9b8be9f2d0cae6bec5194465a287ac65bbb72e60fd0 lastState: {} name: storage-initializer ready: true resources: limits: cpu: "1" memory: 24Gi requests: cpu: 100m memory: 100Mi restartCount: 0 started: false state: terminated: containerID: cri-o://92af2fde0f62e107fc682a12f873b950bfee813bb27dfbc16a6a4df4022e9ab4 exitCode: 0 finishedAt: "2026-04-24T17:13:35Z" reason: Completed startedAt: "2026-04-24T17:13:35Z" user: linux: gid: 0 supplementalGroups: - 0 - 1000690000 uid: 1000690000 volumeMounts: - mountPath: /mnt/models name: kserve-provision-location - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-wqvd7 readOnly: true recursiveReadOnly: Disabled phase: Running podIP: 10.132.0.39 podIPs: - ip: 10.132.0.39 qosClass: Burstable startTime: "2026-04-24T17:13:34Z"