---
apiVersion: v1
kind: Pod
metadata:
  annotations:
    k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.134.0.60/23"],"mac_address":"0a:58:0a:86:00:3c","gateway_ips":["10.134.0.1"],"routes":[{"dest":"10.132.0.0/14","nextHop":"10.134.0.1"},{"dest":"172.31.0.0/16","nextHop":"10.134.0.1"},{"dest":"169.254.0.5/32","nextHop":"10.134.0.1"},{"dest":"100.64.0.0/16","nextHop":"10.134.0.1"}],"ip_address":"10.134.0.60/23","gateway_ip":"10.134.0.1","role":"primary"}}'
    k8s.v1.cni.cncf.io/network-status: |-
      [{
          "name": "ovn-kubernetes",
          "interface": "eth0",
          "ips": [
              "10.134.0.60"
          ],
          "mac": "0a:58:0a:86:00:3c",
          "default": true,
          "dns": {}
      }]
    openshift.io/scc: restricted-v2
    seccomp.security.alpha.kubernetes.io/pod: runtime/default
    security.openshift.io/validated-scc-subject-type: user
  creationTimestamp: "2026-04-16T14:26:27Z"
  deletionGracePeriodSeconds: 30
  deletionTimestamp: "2026-04-16T14:29:50Z"
  generateName: router-with-refs-pd-test-kserve-559fccd7cb-
  generation: 2
  labels:
    app.kubernetes.io/component: llminferenceservice-workload
    app.kubernetes.io/name: router-with-refs-pd-test
    app.kubernetes.io/part-of: llminferenceservice
    kserve.io/component: workload
    llm-d.ai/role: decode
    pod-template-hash: 559fccd7cb
  managedFields:
  - apiVersion: v1
    fieldsType: FieldsV1
    fieldsV1:
      f:metadata:
        f:annotations:
          f:k8s.ovn.org/pod-networks: {}
    manager: ip-10-0-129-3
    operation: Update
    subresource: status
    time: "2026-04-16T14:26:27Z"
  - apiVersion: v1
    fieldsType: FieldsV1
    fieldsV1:
      f:metadata:
        f:generateName: {}
        f:labels:
          .: {}
          f:app.kubernetes.io/component: {}
          f:app.kubernetes.io/name: {}
          f:app.kubernetes.io/part-of: {}
          f:kserve.io/component: {}
          f:llm-d.ai/role: {}
          f:pod-template-hash: {}
        f:ownerReferences:
          .: {}
          k:{"uid":"c76706cd-8291-4c68-b17f-a98cdbb00181"}: {}
      f:spec:
        f:containers:
          k:{"name":"main"}:
            .: {}
            f:command: {}
            f:env:
              .: {}
              k:{"name":"HF_HUB_CACHE"}:
                .: {}
                f:name: {}
                f:value: {}
              k:{"name":"HOME"}:
                .: {}
                f:name: {}
                f:value: {}
              k:{"name":"VLLM_CPU_KVCACHE_SPACE"}:
                .: {}
                f:name: {}
                f:value: {}
              k:{"name":"VLLM_LOGGING_LEVEL"}:
                .: {}
                f:name: {}
                f:value: {}
            f:image: {}
            f:imagePullPolicy: {}
            f:livenessProbe:
              .: {}
              f:failureThreshold: {}
              f:httpGet:
                .: {}
                f:path: {}
                f:port: {}
                f:scheme: {}
              f:initialDelaySeconds: {}
              f:periodSeconds: {}
              f:successThreshold: {}
              f:timeoutSeconds: {}
            f:name: {}
            f:ports:
              .: {}
              k:{"containerPort":8001,"protocol":"TCP"}:
                .: {}
                f:containerPort: {}
                f:protocol: {}
            f:readinessProbe:
              .: {}
              f:failureThreshold: {}
              f:httpGet:
                .: {}
                f:path: {}
                f:port: {}
                f:scheme: {}
              f:initialDelaySeconds: {}
              f:periodSeconds: {}
              f:successThreshold: {}
              f:timeoutSeconds: {}
            f:resources:
              .: {}
              f:limits:
                .: {}
                f:cpu: {}
                f:memory: {}
              f:requests:
                .: {}
                f:cpu: {}
                f:memory: {}
            f:securityContext:
              .: {}
              f:allowPrivilegeEscalation: {}
              f:capabilities:
                .: {}
                f:drop: {}
              f:readOnlyRootFilesystem: {}
              f:runAsNonRoot: {}
              f:seccompProfile:
                .: {}
                f:type: {}
            f:startupProbe:
              .: {}
              f:failureThreshold: {}
              f:httpGet:
                .: {}
                f:path: {}
                f:port: {}
                f:scheme: {}
              f:periodSeconds: {}
              f:successThreshold: {}
              f:timeoutSeconds: {}
            f:terminationMessagePath: {}
            f:terminationMessagePolicy: {}
            f:volumeMounts:
              .: {}
              k:{"mountPath":"/dev/shm"}:
                .: {}
                f:mountPath: {}
                f:name: {}
              k:{"mountPath":"/home"}:
                .: {}
                f:mountPath: {}
                f:name: {}
              k:{"mountPath":"/mnt/models"}:
                .: {}
                f:mountPath: {}
                f:name: {}
                f:readOnly: {}
              k:{"mountPath":"/models"}:
                .: {}
                f:mountPath: {}
                f:name: {}
              k:{"mountPath":"/var/run/kserve/tls"}:
                .: {}
                f:mountPath: {}
                f:name: {}
                f:readOnly: {}
        f:dnsPolicy: {}
        f:enableServiceLinks: {}
        f:initContainers:
          .: {}
          k:{"name":"llm-d-routing-sidecar"}:
            .: {}
            f:command: {}
            f:env:
              .: {}
              k:{"name":"INFERENCE_POOL_NAME"}:
                .: {}
                f:name: {}
                f:value: {}
              k:{"name":"INFERENCE_POOL_NAMESPACE"}:
                .: {}
                f:name: {}
                f:valueFrom:
                  .: {}
                  f:fieldRef: {}
              k:{"name":"SSL_CERT_DIR"}:
                .: {}
                f:name: {}
                f:value: {}
            f:image: {}
            f:imagePullPolicy: {}
            f:livenessProbe:
              .: {}
              f:failureThreshold: {}
              f:httpGet:
                .: {}
                f:path: {}
                f:port: {}
                f:scheme: {}
              f:initialDelaySeconds: {}
              f:periodSeconds: {}
              f:successThreshold: {}
              f:timeoutSeconds: {}
            f:name: {}
            f:ports:
              .: {}
              k:{"containerPort":8000,"protocol":"TCP"}:
                .: {}
                f:containerPort: {}
                f:protocol: {}
            f:readinessProbe:
              .: {}
              f:failureThreshold: {}
              f:httpGet:
                .: {}
                f:path: {}
                f:port: {}
                f:scheme: {}
              f:initialDelaySeconds: {}
              f:periodSeconds: {}
              f:successThreshold: {}
              f:timeoutSeconds: {}
            f:resources: {}
            f:restartPolicy: {}
            f:securityContext:
              .: {}
              f:allowPrivilegeEscalation: {}
              f:capabilities:
                .: {}
                f:drop: {}
              f:readOnlyRootFilesystem: {}
              f:runAsNonRoot: {}
            f:terminationMessagePath: {}
            f:terminationMessagePolicy: {}
            f:volumeMounts:
              .: {}
              k:{"mountPath":"/var/run/kserve/tls"}:
                .: {}
                f:mountPath: {}
                f:name: {}
                f:readOnly: {}
          k:{"name":"storage-initializer"}:
            .: {}
            f:args: {}
            f:env:
              .: {}
              k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                .: {}
                f:name: {}
                f:value: {}
              k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                .: {}
                f:name: {}
                f:value: {}
              k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                .: {}
                f:name: {}
                f:value: {}
            f:image: {}
            f:imagePullPolicy: {}
            f:name: {}
            f:resources:
              .: {}
              f:limits:
                .: {}
                f:cpu: {}
                f:memory: {}
              f:requests:
                .: {}
                f:cpu: {}
                f:memory: {}
            f:terminationMessagePath: {}
            f:terminationMessagePolicy: {}
            f:volumeMounts:
              .: {}
              k:{"mountPath":"/mnt/models"}:
                .: {}
                f:mountPath: {}
                f:name: {}
        f:restartPolicy: {}
        f:schedulerName: {}
        f:securityContext: {}
        f:serviceAccount: {}
        f:serviceAccountName: {}
        f:terminationGracePeriodSeconds: {}
        f:volumes:
          .: {}
          k:{"name":"dshm"}:
            .: {}
            f:emptyDir:
              .: {}
              f:medium: {}
              f:sizeLimit: {}
            f:name: {}
          k:{"name":"home"}:
            .: {}
            f:emptyDir: {}
            f:name: {}
          k:{"name":"kserve-provision-location"}:
            .: {}
            f:emptyDir: {}
            f:name: {}
          k:{"name":"model-cache"}:
            .: {}
            f:emptyDir: {}
            f:name: {}
          k:{"name":"tls-certs"}:
            .: {}
            f:name: {}
            f:secret:
              .: {}
              f:defaultMode: {}
              f:secretName: {}
    manager: kube-controller-manager
    operation: Update
    time: "2026-04-16T14:26:27Z"
  - apiVersion: v1
    fieldsType: FieldsV1
    fieldsV1:
      f:metadata:
        f:annotations:
          f:k8s.v1.cni.cncf.io/network-status: {}
    manager: multus-daemon
    operation: Update
    subresource: status
    time: "2026-04-16T14:26:27Z"
  - apiVersion: v1
    fieldsType: FieldsV1
    fieldsV1:
      f:status:
        f:conditions:
          k:{"type":"ContainersReady"}:
            .: {}
            f:lastProbeTime: {}
            f:lastTransitionTime: {}
            f:status: {}
            f:type: {}
          k:{"type":"Initialized"}:
            .: {}
            f:lastProbeTime: {}
            f:lastTransitionTime: {}
            f:status: {}
            f:type: {}
          k:{"type":"PodReadyToStartContainers"}:
            .: {}
            f:lastProbeTime: {}
            f:lastTransitionTime: {}
            f:status: {}
            f:type: {}
          k:{"type":"Ready"}:
            .: {}
            f:lastProbeTime: {}
            f:lastTransitionTime: {}
            f:status: {}
            f:type: {}
        f:containerStatuses: {}
        f:hostIP: {}
        f:hostIPs: {}
        f:initContainerStatuses: {}
        f:phase: {}
        f:podIP: {}
        f:podIPs:
          .: {}
          k:{"ip":"10.134.0.60"}:
            .: {}
            f:ip: {}
        f:startTime: {}
    manager: kubelet
    operation: Update
    subresource: status
    time: "2026-04-16T14:29:07Z"
  name: router-with-refs-pd-test-kserve-559fccd7cb-kwfkq
  namespace: kserve-ci-e2e-test
  ownerReferences:
  - apiVersion: apps/v1
    blockOwnerDeletion: true
    controller: true
    kind: ReplicaSet
    name: router-with-refs-pd-test-kserve-559fccd7cb
    uid: c76706cd-8291-4c68-b17f-a98cdbb00181
  resourceVersion: "36855"
  uid: 9e845b36-1515-4741-8a77-993cd22eb17b
spec:
  containers:
  - command:
    - /bin/bash
    - -c
    - |-
      if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then
        source /etc/profile.d/ibm-aiu-setup.sh
      fi

      if [ "$KSERVE_INFER_ROCE" = "true" ]; then
        echo "Trying to infer RoCE configs ... "
        grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null
        grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null

        cat /proc/driver/nvidia/params

        KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"}

        echo "[Infer RoCE] Discovering active HCAs ..."
        active_hcas=()
        # Loop through all mlx5 devices found in sysfs
        for hca_dir in /sys/class/infiniband/mlx5_*; do
            # Ensure it's a directory before proceeding
            if [ -d "$hca_dir" ]; then
                hca_name=$(basename "$hca_dir")
                port_state_file="$hca_dir/ports/1/state" # Assume port 1
                type_file="$hca_dir/ports/1/gid_attrs/types/*"

                echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'"
                if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then
                    echo "[Infer RoCE] Found active HCA: $hca_name"
                    active_hcas+=("$hca_name")
                else
                    echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name"
                fi
            fi
        done

        ucx_hcas=()
        for hca in "${active_hcas[@]}"; do
          ucx_hcas+=("${hca}:1")
        done

        # Check if we found any active HCAs
        if [ ${#active_hcas[@]} -gt 0 ]; then
            # Join the array elements with a comma
            hcas=$(IFS=,; echo "${active_hcas[*]}")
            echo "[Infer RoCE] Setting active HCAs: ${hcas}"
            export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}
            export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}
            export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}

            echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}"
            echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}"
        else
            echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set."
        fi

        if [ ${#active_hcas[@]} -gt 0 ]; then
            echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..."

            # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs
            declare -A gid_index_count
            declare -A hca_gid_index

            for hca_name in "${active_hcas[@]}"; do
                echo "[Infer RoCE] Processing HCA: ${hca_name}"

                # Find all RoCE v2 IPv4 GIDs for this HCA and count by index
                for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do
                    if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then
                        idx=$(basename "$tpath")
                        gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}"
                        # Check for IPv4 GID (contains ffff:)
                        if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then
                            gid_value=$(cat "$gid_file" 2>/dev/null || echo "")
                            echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}"
                            hca_gid_index["${hca_name}"]="${idx}"
                            gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1))
                            break  # Use first found IPv4 GID per HCA
                        fi
                    fi
                done
            done

            # Find the most common GID index (most likely to be consistent across nodes)
            best_gid_index=""
            max_count=0
            for idx in "${!gid_index_count[@]}"; do
                count=${gid_index_count["${idx}"]}
                echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs"
                if [ $count -gt $max_count ]; then
                    max_count=$count
                    best_gid_index="$idx"
                fi
            done

            # Use deterministic fallback if counts are equal - prefer lower index number
            if [ ${#gid_index_count[@]} -gt 1 ]; then
                echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}"
                # If there's a tie, prefer index 3 as it's most common in SR-IOV setups
                if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then
                    best_gid_index="3"
                    echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)"
                fi
            fi

            # Check if GID_INDEX is already set via environment variables
            if [ -n "${NCCL_IB_GID_INDEX}" ]; then
                echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment"
                export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX"
            elif [ -n "$best_gid_index" ]; then
                echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)"

                export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}
                export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}
                export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}

                echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX"
            else
                echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA."
            fi
        else
            echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference."
        fi
      fi

      eval "vllm serve /mnt/models \
        --served-model-name "facebook/opt-125m" \
        --port 8001 \
        --disable-uvicorn-access-log \
        --enable-ssl-refresh \
        --ssl-certfile /var/run/kserve/tls/tls.crt \
        --ssl-keyfile /var/run/kserve/tls/tls.key \
        ${VLLM_ADDITIONAL_ARGS} \
        $@"
    - --
    env:
    - name: HOME
      value: /home
    - name: VLLM_LOGGING_LEVEL
      value: DEBUG
    - name: VLLM_CPU_KVCACHE_SPACE
      value: "1"
    - name: HF_HUB_CACHE
      value: /models
    image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.17.1
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        path: /health
        port: 8000
        scheme: HTTPS
      initialDelaySeconds: 180
      periodSeconds: 30
      successThreshold: 1
      timeoutSeconds: 30
    name: main
    ports:
    - containerPort: 8001
      protocol: TCP
    readinessProbe:
      failureThreshold: 3
      httpGet:
        path: /health
        port: 8000
        scheme: HTTPS
      initialDelaySeconds: 30
      periodSeconds: 10
      successThreshold: 1
      timeoutSeconds: 5
    resources:
      limits:
        cpu: "2"
        memory: 7Gi
      requests:
        cpu: 200m
        memory: 2Gi
    securityContext:
      allowPrivilegeEscalation: false
      capabilities:
        drop:
        - ALL
      readOnlyRootFilesystem: false
      runAsNonRoot: true
      runAsUser: 1000690000
      seccompProfile:
        type: RuntimeDefault
    startupProbe:
      failureThreshold: 60
      httpGet:
        path: /health
        port: 8001
        scheme: HTTPS
      periodSeconds: 10
      successThreshold: 1
      timeoutSeconds: 1
    terminationMessagePath: /dev/termination-log
    terminationMessagePolicy: FallbackToLogsOnError
    volumeMounts:
    - mountPath: /home
      name: home
    - mountPath: /dev/shm
      name: dshm
    - mountPath: /models
      name: model-cache
    - mountPath: /var/run/kserve/tls
      name: tls-certs
      readOnly: true
    - mountPath: /mnt/models
      name: kserve-provision-location
      readOnly: true
    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      name: kube-api-access-xwjtj
      readOnly: true
  dnsPolicy: ClusterFirst
  enableServiceLinks: true
  imagePullSecrets:
  - name: default-dockercfg-lsh9l
  - name: router-with-refs-pd-test-kserve-dockercfg-gz97h
  initContainers:
  - command:
    - /app/pd-sidecar
    - --port=8000
    - --vllm-port=8001
    - --connector=nixlv2
    - --enable-ssrf-protection=true
    - --pool-group=inference.networking.x-k8s.io
    - --secure-proxy=true
    - --cert-path=/var/run/kserve/tls
    - --decoder-use-tls=true
    - --prefiller-use-tls=true
    env:
    - name: INFERENCE_POOL_NAMESPACE
      valueFrom:
        fieldRef:
          apiVersion: v1
          fieldPath: metadata.namespace
    - name: SSL_CERT_DIR
      value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
    - name: INFERENCE_POOL_NAME
      value: router-with-refs-pd-test-inference-pool
    image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.7.1
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 3
      httpGet:
        path: /health
        port: 8000
        scheme: HTTPS
      initialDelaySeconds: 10
      periodSeconds: 10
      successThreshold: 1
      timeoutSeconds: 10
    name: llm-d-routing-sidecar
    ports:
    - containerPort: 8000
      protocol: TCP
    readinessProbe:
      failureThreshold: 10
      httpGet:
        path: /health
        port: 8000
        scheme: HTTPS
      initialDelaySeconds: 10
      periodSeconds: 10
      successThreshold: 1
      timeoutSeconds: 5
    resources: {}
    restartPolicy: Always
    securityContext:
      allowPrivilegeEscalation: false
      capabilities:
        drop:
        - ALL
      readOnlyRootFilesystem: false
      runAsNonRoot: false
      runAsUser: 1000690000
    terminationMessagePath: /dev/termination-log
    terminationMessagePolicy: FallbackToLogsOnError
    volumeMounts:
    - mountPath: /var/run/kserve/tls
      name: tls-certs
      readOnly: true
    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      name: kube-api-access-xwjtj
      readOnly: true
  - args:
    - hf://facebook/opt-125m
    - /mnt/models
    env:
    - name: HF_HUB_ENABLE_HF_TRANSFER
      value: "1"
    - name: HF_XET_HIGH_PERFORMANCE
      value: "1"
    - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
      value: "8"
    image: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589
    imagePullPolicy: IfNotPresent
    name: storage-initializer
    resources:
      limits:
        cpu: "1"
        memory: 24Gi
      requests:
        cpu: 100m
        memory: 100Mi
    securityContext:
      allowPrivilegeEscalation: false
      capabilities:
        drop:
        - ALL
      runAsNonRoot: true
      runAsUser: 1000690000
    terminationMessagePath: /dev/termination-log
    terminationMessagePolicy: FallbackToLogsOnError
    volumeMounts:
    - mountPath: /mnt/models
      name: kserve-provision-location
    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      name: kube-api-access-xwjtj
      readOnly: true
  nodeName: ip-10-0-129-3.ec2.internal
  preemptionPolicy: PreemptLowerPriority
  priority: 0
  restartPolicy: Always
  schedulerName: default-scheduler
  securityContext:
    fsGroup: 1000690000
    seLinuxOptions:
      level: s0:c26,c20
    seccompProfile:
      type: RuntimeDefault
  serviceAccount: router-with-refs-pd-test-kserve
  serviceAccountName: router-with-refs-pd-test-kserve
  terminationGracePeriodSeconds: 30
  tolerations:
  - effect: NoExecute
    key: node.kubernetes.io/not-ready
    operator: Exists
    tolerationSeconds: 300
  - effect: NoExecute
    key: node.kubernetes.io/unreachable
    operator: Exists
    tolerationSeconds: 300
  - effect: NoSchedule
    key: node.kubernetes.io/memory-pressure
    operator: Exists
  volumes:
  - emptyDir: {}
    name: home
  - emptyDir:
      medium: Memory
      sizeLimit: 1Gi
    name: dshm
  - emptyDir: {}
    name: model-cache
  - name: tls-certs
    secret:
      defaultMode: 420
      secretName: router-with-refs-pd-test-kserve-self-signed-certs
  - emptyDir: {}
    name: kserve-provision-location
  - name: kube-api-access-xwjtj
    projected:
      defaultMode: 420
      sources:
      - serviceAccountToken:
          expirationSeconds: 3607
          path: token
      - configMap:
          items:
          - key: ca.crt
            path: ca.crt
          name: kube-root-ca.crt
      - downwardAPI:
          items:
          - fieldRef:
              apiVersion: v1
              fieldPath: metadata.namespace
            path: namespace
      - configMap:
          items:
          - key: service-ca.crt
            path: service-ca.crt
          name: openshift-service-ca.crt
status:
  conditions:
  - lastProbeTime: null
    lastTransitionTime: "2026-04-16T14:26:28Z"
    status: "True"
    type: PodReadyToStartContainers
  - lastProbeTime: null
    lastTransitionTime: "2026-04-16T14:26:33Z"
    status: "True"
    type: Initialized
  - lastProbeTime: null
    lastTransitionTime: "2026-04-16T14:29:07Z"
    status: "True"
    type: Ready
  - lastProbeTime: null
    lastTransitionTime: "2026-04-16T14:29:07Z"
    status: "True"
    type: ContainersReady
  - lastProbeTime: null
    lastTransitionTime: "2026-04-16T14:26:27Z"
    status: "True"
    type: PodScheduled
  containerStatuses:
  - allocatedResources:
      cpu: 200m
      memory: 2Gi
    containerID: cri-o://21760630dd46ed09ed07d7d1f26a946c9c40ec7c87726c516a548115251cb2a7
    image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.17.1
    imageID: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo@sha256:d19978a2d4bb2289c740a6c89d4cc15fbcf4d20d916f1e268168b8bbad3b776b
    lastState: {}
    name: main
    ready: true
    resources:
      limits:
        cpu: "2"
        memory: 7Gi
      requests:
        cpu: 200m
        memory: 2Gi
    restartCount: 0
    started: true
    state:
      running:
        startedAt: "2026-04-16T14:26:33Z"
    user:
      linux:
        gid: 0
        supplementalGroups:
        - 0
        - 1000690000
        uid: 1000690000
    volumeMounts:
    - mountPath: /home
      name: home
    - mountPath: /dev/shm
      name: dshm
    - mountPath: /models
      name: model-cache
    - mountPath: /var/run/kserve/tls
      name: tls-certs
      readOnly: true
      recursiveReadOnly: Disabled
    - mountPath: /mnt/models
      name: kserve-provision-location
      readOnly: true
      recursiveReadOnly: Disabled
    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      name: kube-api-access-xwjtj
      readOnly: true
      recursiveReadOnly: Disabled
  hostIP: 10.0.129.3
  hostIPs:
  - ip: 10.0.129.3
  initContainerStatuses:
  - containerID: cri-o://c0ca5e3249cea969ec1829d8d646e3020ce684adcca1be6aa374138041f9a452
    image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.7.1
    imageID: ghcr.io/llm-d/llm-d-routing-sidecar@sha256:14ff2530c83bd6f95fa5b25309b150623b403da83f9152f635858f02163e2f95
    lastState: {}
    name: llm-d-routing-sidecar
    ready: true
    resources: {}
    restartCount: 0
    started: true
    state:
      running:
        startedAt: "2026-04-16T14:26:27Z"
    user:
      linux:
        gid: 0
        supplementalGroups:
        - 0
        - 1000690000
        uid: 1000690000
    volumeMounts:
    - mountPath: /var/run/kserve/tls
      name: tls-certs
      readOnly: true
      recursiveReadOnly: Disabled
    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      name: kube-api-access-xwjtj
      readOnly: true
      recursiveReadOnly: Disabled
  - allocatedResources:
      cpu: 100m
      memory: 100Mi
    containerID: cri-o://36e9be3ab6226725f4aa608c7c9217437015547a8335db28208350681746da80
    image: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589
    imageID: quay.io/opendatahub/kserve-storage-initializer@sha256:16546eaa530f8a9a22c60a7ffa82a496f33cd9dfcf4989c8baef0968e8392589
    lastState: {}
    name: storage-initializer
    ready: true
    resources:
      limits:
        cpu: "1"
        memory: 24Gi
      requests:
        cpu: 100m
        memory: 100Mi
    restartCount: 0
    started: false
    state:
      terminated:
        containerID: cri-o://36e9be3ab6226725f4aa608c7c9217437015547a8335db28208350681746da80
        exitCode: 0
        finishedAt: "2026-04-16T14:26:32Z"
        reason: Completed
        startedAt: "2026-04-16T14:26:28Z"
    user:
      linux:
        gid: 0
        supplementalGroups:
        - 0
        - 1000690000
        uid: 1000690000
    volumeMounts:
    - mountPath: /mnt/models
      name: kserve-provision-location
    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      name: kube-api-access-xwjtj
      readOnly: true
      recursiveReadOnly: Disabled
  phase: Running
  podIP: 10.134.0.60
  podIPs:
  - ip: 10.134.0.60
  qosClass: Burstable
  startTime: "2026-04-16T14:26:27Z"