---
apiVersion: apps/v1
items:
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:03:11Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-workload
      app.kubernetes.io/name: auth-enabled-test
      app.kubernetes.io/part-of: llminferenceservice
      kserve.io/component: workload
      llm-d.ai/role: both
      pod-template-hash: 85d86d876c
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:kserve.io/component: {}
            f:llm-d.ai/role: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"bab81096-d0c8-40e1-bbc4-635e04522112"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:kserve.io/component: {}
                f:llm-d.ai/role: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"HF_HUB_CACHE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HOME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"TORCHINDUCTOR_CACHE_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"USER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_CPU_KVCACHE_SPACE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_LOGGING_LEVEL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:lifecycle:
                    .: {}
                    f:preStop:
                      .: {}
                      f:exec:
                        .: {}
                        f:command: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8000,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/dev/shm"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/home"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"dshm"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                    f:sizeLimit: {}
                  f:name: {}
                k:{"name":"home"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"model-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tmp-dir"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:03:11Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:03:11Z"
    name: auth-enabled-test-kserve-85d86d876c
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: auth-enabled-test-kserve
      uid: bab81096-d0c8-40e1-bbc4-635e04522112
    resourceVersion: "24189"
    uid: afbab033-b48f-4827-a683-4e1cc3932d27
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-workload
        app.kubernetes.io/name: auth-enabled-test
        app.kubernetes.io/part-of: llminferenceservice
        kserve.io/component: workload
        llm-d.ai/role: both
        pod-template-hash: 85d86d876c
    template:
      metadata:
        labels:
          app.kubernetes.io/component: llminferenceservice-workload
          app.kubernetes.io/name: auth-enabled-test
          app.kubernetes.io/part-of: llminferenceservice
          kserve.io/component: workload
          llm-d.ai/role: both
          pod-template-hash: 85d86d876c
      spec:
        containers:
        - command:
          - /bin/bash
          - -c
          - |-
            if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then
              source /etc/profile.d/ibm-aiu-setup.sh
            fi

            if [ "$KSERVE_INFER_ROCE" = "true" ]; then
              echo "Trying to infer RoCE configs ... "
              grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null
              grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null

              cat /proc/driver/nvidia/params

              KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"}

              echo "[Infer RoCE] Discovering active HCAs ..."
              active_hcas=()
              # Loop through all mlx5 devices found in sysfs
              for hca_dir in /sys/class/infiniband/mlx5_*; do
                  # Ensure it's a directory before proceeding
                  if [ -d "$hca_dir" ]; then
                      hca_name=$(basename "$hca_dir")
                      port_state_file="$hca_dir/ports/1/state" # Assume port 1
                      type_file="$hca_dir/ports/1/gid_attrs/types/*"

                      echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'"
                      if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then
                          echo "[Infer RoCE] Found active HCA: $hca_name"
                          active_hcas+=("$hca_name")
                      else
                          echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name"
                      fi
                  fi
              done

              ucx_hcas=()
              for hca in "${active_hcas[@]}"; do
                ucx_hcas+=("${hca}:1")
              done

              # Check if we found any active HCAs
              if [ ${#active_hcas[@]} -gt 0 ]; then
                  # Join the array elements with a comma
                  hcas=$(IFS=,; echo "${active_hcas[*]}")
                  echo "[Infer RoCE] Setting active HCAs: ${hcas}"
                  export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}
                  export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}
                  export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}

                  echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}"
                  echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}"
              else
                  echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set."
              fi

              if [ ${#active_hcas[@]} -gt 0 ]; then
                  echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..."

                  # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs
                  declare -A gid_index_count
                  declare -A hca_gid_index

                  for hca_name in "${active_hcas[@]}"; do
                      echo "[Infer RoCE] Processing HCA: ${hca_name}"

                      # Find all RoCE v2 IPv4 GIDs for this HCA and count by index
                      for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do
                          if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then
                              idx=$(basename "$tpath")
                              gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}"
                              # Check for IPv4 GID (contains ffff:)
                              if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then
                                  gid_value=$(cat "$gid_file" 2>/dev/null || echo "")
                                  echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}"
                                  hca_gid_index["${hca_name}"]="${idx}"
                                  gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1))
                                  break  # Use first found IPv4 GID per HCA
                              fi
                          fi
                      done
                  done

                  # Find the most common GID index (most likely to be consistent across nodes)
                  best_gid_index=""
                  max_count=0
                  for idx in "${!gid_index_count[@]}"; do
                      count=${gid_index_count["${idx}"]}
                      echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs"
                      if [ $count -gt $max_count ]; then
                          max_count=$count
                          best_gid_index="$idx"
                      fi
                  done

                  # Use deterministic fallback if counts are equal - prefer lower index number
                  if [ ${#gid_index_count[@]} -gt 1 ]; then
                      echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}"
                      # If there's a tie, prefer index 3 as it's most common in SR-IOV setups
                      if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then
                          best_gid_index="3"
                          echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)"
                      fi
                  fi

                  # Check if GID_INDEX is already set via environment variables
                  if [ -n "${NCCL_IB_GID_INDEX}" ]; then
                      echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment"
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX"
                  elif [ -n "$best_gid_index" ]; then
                      echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)"

                      export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}

                      echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX"
                  else
                      echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA."
                  fi
              else
                  echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference."
              fi
            fi

            # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011).
            # Older versions still need the blanket --disable-uvicorn-access-log.
            ACCESS_LOG_ARGS="--disable-uvicorn-access-log"
            VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}')
            echo "[access-log-detect] vllm version='${VLLM_VERSION}'"
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then
              ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping"
            fi
            echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'"

            # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666).
            SHUTDOWN_TIMEOUT_ARGS=""
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then
              SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40"
            fi

            eval "exec vllm serve /mnt/models \
              --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \
              --port 8000 \
              ${ACCESS_LOG_ARGS} \
              ${SHUTDOWN_TIMEOUT_ARGS} \
              --enable-ssl-refresh \
              --ssl-certfile /var/run/kserve/tls/tls.crt \
              --ssl-keyfile /var/run/kserve/tls/tls.key \
              ${VLLM_ADDITIONAL_ARGS} \
              $@"
          - --
          env:
          - name: HOME
            value: /home
          - name: VLLM_LOGGING_LEVEL
            value: DEBUG
          - name: VLLM_CPU_KVCACHE_SPACE
            value: "1"
          - name: VLLM_ENABLE_V1_MULTIPROCESSING
            value: "0"
          - name: USER
            value: nonroot
          - name: TORCHINDUCTOR_CACHE_DIR
            value: /tmp/torchinductor-cache
          - name: HF_HUB_CACHE
            value: /models
          image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0
          imagePullPolicy: IfNotPresent
          lifecycle:
            preStop:
              exec:
                command:
                - /bin/sleep
                - "15"
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 10
          name: main
          ports:
          - containerPort: 8000
            protocol: TCP
          readinessProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            limits:
              cpu: "2"
              memory: 7Gi
            requests:
              cpu: 200m
              memory: 2Gi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /home
            name: home
          - mountPath: /tmp
            name: tmp-dir
          - mountPath: /dev/shm
            name: dshm
          - mountPath: /models
            name: model-cache
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /mnt/models
            name: kserve-provision-location
            readOnly: true
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        terminationGracePeriodSeconds: 60
        volumes:
        - emptyDir: {}
          name: home
        - emptyDir:
            medium: Memory
            sizeLimit: 1Gi
          name: dshm
        - emptyDir: {}
          name: model-cache
        - emptyDir: {}
          name: tmp-dir
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: auth-enabled-test-kserve-self-signed-certs
        - emptyDir: {}
          name: kserve-provision-location
  status:
    fullyLabeledReplicas: 1
    observedGeneration: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "1"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:03:11Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-router-scheduler
      app.kubernetes.io/name: auth-enabled-test
      app.kubernetes.io/part-of: llminferenceservice
      pod-template-hash: 6c5d597fbb
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"21e6fa54-6782-4056-8977-4ca5d2d316f9"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:app.kubernetes.io/version: {}
                f:certificates.kserve.io/expiration-v2: {}
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"SSL_CERT_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":5557,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9002,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9003,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                k:{"name":"tokenizer"}:
                  .: {}
                  f:env:
                    .: {}
                    k:{"name":"TOKENIZERS_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8082,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/.cache"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models/base"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                  f:workingDir: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"STORAGE_ALLOW_PATTERNS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tokenizer-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-tmp"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-uds"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:03:11Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:03:43Z"
    name: auth-enabled-test-kserve-router-scheduler-6c5d597fbb
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: auth-enabled-test-kserve-router-scheduler
      uid: 21e6fa54-6782-4056-8977-4ca5d2d316f9
    resourceVersion: "24857"
    uid: eedb0e24-1784-41de-852e-01b08abb9f57
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-router-scheduler
        app.kubernetes.io/name: auth-enabled-test
        app.kubernetes.io/part-of: llminferenceservice
        pod-template-hash: 6c5d597fbb
    template:
      metadata:
        annotations:
          app.kubernetes.io/version: 0.7.0
          certificates.kserve.io/expiration-v2: "true"
        labels:
          app.kubernetes.io/component: llminferenceservice-router-scheduler
          app.kubernetes.io/name: auth-enabled-test
          app.kubernetes.io/part-of: llminferenceservice
          pod-template-hash: 6c5d597fbb
      spec:
        containers:
        - args:
          - --config-text
          - |
            apiVersion: inference.networking.x-k8s.io/v1alpha1
            kind: EndpointPickerConfig
            plugins:
            - type: single-profile-handler
            - type: queue-scorer
            - type: prefix-cache-scorer
            - type: max-score-picker
            schedulingProfiles:
            - name: default
              plugins:
              - pluginRef: queue-scorer
                weight: 2
              - pluginRef: prefix-cache-scorer
                weight: 3
              - pluginRef: max-score-picker
          command:
          - /app/epp
          - --pool-name
          - auth-enabled-test-inference-pool
          - --pool-namespace
          - kserve-ci-e2e-test
          - --zap-encoder
          - json
          - --grpc-port
          - "9002"
          - --grpc-health-port
          - "9003"
          - --enable-cert-reload=true
          - --secure-serving=true
          - --model-server-metrics-scheme=https
          - --cert-path=/var/run/kserve/tls
          env:
          - name: SSL_CERT_DIR
            value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
          image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: liveness
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          name: main
          ports:
          - containerPort: 9002
            name: grpc
            protocol: TCP
          - containerPort: 9003
            name: grpc-health
            protocol: TCP
          - containerPort: 9090
            name: metrics
            protocol: TCP
          - containerPort: 5557
            name: zmq
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: readiness
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
        - env:
          - name: TOKENIZERS_DIR
            value: /mnt/models
          image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 5
          name: tokenizer
          ports:
          - containerPort: 8082
            name: health
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /tmp
            name: tokenizer-tmp
          - mountPath: /.cache
            name: tokenizer-cache
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
          - mountPath: /mnt/models/base
            name: kserve-provision-location
            readOnly: true
          workingDir: /mnt/models
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          - name: STORAGE_ALLOW_PATTERNS
            value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json",
              "vocab.json", "merges.txt", "config.json", "generation_config.json"]'
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        serviceAccount: auth-enabled-test-epp-sa
        serviceAccountName: auth-enabled-test-epp-sa
        terminationGracePeriodSeconds: 30
        volumes:
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: auth-enabled-test-kserve-self-signed-certs
        - emptyDir: {}
          name: tokenizer-uds
        - emptyDir: {}
          name: tokenizer-tmp
        - emptyDir: {}
          name: tokenizer-cache
        - emptyDir: {}
          name: kserve-provision-location
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:45:13Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-workload
      app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-7ca60146
      app.kubernetes.io/part-of: llminferenceservice
      kserve.io/component: workload
      llm-d.ai/role: both
      pod-template-hash: 6dbc7ddb8d
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:kserve.io/component: {}
            f:llm-d.ai/role: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"1bfd7251-d9a8-44ba-bcbe-387027ab2792"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:kserve.io/component: {}
                f:llm-d.ai/role: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"HF_HUB_CACHE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HOME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"TORCHINDUCTOR_CACHE_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"USER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_CPU_KVCACHE_SPACE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_LOGGING_LEVEL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:lifecycle:
                    .: {}
                    f:preStop:
                      .: {}
                      f:exec:
                        .: {}
                        f:command: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8000,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/dev/shm"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/home"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"CA_BUNDLE_CONFIGMAP_NAME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"CA_BUNDLE_VOLUME_MOUNT_POINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/etc/ssl/custom-certs"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/mnt"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"cabundle-cert"}:
                  .: {}
                  f:configMap:
                    .: {}
                    f:defaultMode: {}
                    f:name: {}
                  f:name: {}
                k:{"name":"dshm"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                    f:sizeLimit: {}
                  f:name: {}
                k:{"name":"home"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"model-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tmp-dir"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:45:13Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:45:13Z"
    name: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-6dbc7ddb8d
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: llmisvc-model-fb-opt-125m-with-7ca60146-kserve
      uid: 1bfd7251-d9a8-44ba-bcbe-387027ab2792
    resourceVersion: "53927"
    uid: 2a8b4d6b-21fa-4755-bad0-3501d8b92cfb
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-workload
        app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-7ca60146
        app.kubernetes.io/part-of: llminferenceservice
        kserve.io/component: workload
        llm-d.ai/role: both
        pod-template-hash: 6dbc7ddb8d
    template:
      metadata:
        labels:
          app.kubernetes.io/component: llminferenceservice-workload
          app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-7ca60146
          app.kubernetes.io/part-of: llminferenceservice
          kserve.io/component: workload
          llm-d.ai/role: both
          pod-template-hash: 6dbc7ddb8d
      spec:
        containers:
        - args:
          - --enable-lora
          - --lora-modules
          - '''{"name":"lora-adapter-1","path":"/mnt/lora/lora-adapter-1"}'''
          - '''{"name":"publishers/kserve-ci-e2e-test/models/lora-adapter-1","path":"/mnt/lora/lora-adapter-1"}'''
          command:
          - /bin/bash
          - -c
          - |-
            if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then
              source /etc/profile.d/ibm-aiu-setup.sh
            fi

            if [ "$KSERVE_INFER_ROCE" = "true" ]; then
              echo "Trying to infer RoCE configs ... "
              grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null
              grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null

              cat /proc/driver/nvidia/params

              KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"}

              echo "[Infer RoCE] Discovering active HCAs ..."
              active_hcas=()
              # Loop through all mlx5 devices found in sysfs
              for hca_dir in /sys/class/infiniband/mlx5_*; do
                  # Ensure it's a directory before proceeding
                  if [ -d "$hca_dir" ]; then
                      hca_name=$(basename "$hca_dir")
                      port_state_file="$hca_dir/ports/1/state" # Assume port 1
                      type_file="$hca_dir/ports/1/gid_attrs/types/*"

                      echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'"
                      if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then
                          echo "[Infer RoCE] Found active HCA: $hca_name"
                          active_hcas+=("$hca_name")
                      else
                          echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name"
                      fi
                  fi
              done

              ucx_hcas=()
              for hca in "${active_hcas[@]}"; do
                ucx_hcas+=("${hca}:1")
              done

              # Check if we found any active HCAs
              if [ ${#active_hcas[@]} -gt 0 ]; then
                  # Join the array elements with a comma
                  hcas=$(IFS=,; echo "${active_hcas[*]}")
                  echo "[Infer RoCE] Setting active HCAs: ${hcas}"
                  export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}
                  export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}
                  export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}

                  echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}"
                  echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}"
              else
                  echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set."
              fi

              if [ ${#active_hcas[@]} -gt 0 ]; then
                  echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..."

                  # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs
                  declare -A gid_index_count
                  declare -A hca_gid_index

                  for hca_name in "${active_hcas[@]}"; do
                      echo "[Infer RoCE] Processing HCA: ${hca_name}"

                      # Find all RoCE v2 IPv4 GIDs for this HCA and count by index
                      for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do
                          if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then
                              idx=$(basename "$tpath")
                              gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}"
                              # Check for IPv4 GID (contains ffff:)
                              if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then
                                  gid_value=$(cat "$gid_file" 2>/dev/null || echo "")
                                  echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}"
                                  hca_gid_index["${hca_name}"]="${idx}"
                                  gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1))
                                  break  # Use first found IPv4 GID per HCA
                              fi
                          fi
                      done
                  done

                  # Find the most common GID index (most likely to be consistent across nodes)
                  best_gid_index=""
                  max_count=0
                  for idx in "${!gid_index_count[@]}"; do
                      count=${gid_index_count["${idx}"]}
                      echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs"
                      if [ $count -gt $max_count ]; then
                          max_count=$count
                          best_gid_index="$idx"
                      fi
                  done

                  # Use deterministic fallback if counts are equal - prefer lower index number
                  if [ ${#gid_index_count[@]} -gt 1 ]; then
                      echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}"
                      # If there's a tie, prefer index 3 as it's most common in SR-IOV setups
                      if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then
                          best_gid_index="3"
                          echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)"
                      fi
                  fi

                  # Check if GID_INDEX is already set via environment variables
                  if [ -n "${NCCL_IB_GID_INDEX}" ]; then
                      echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment"
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX"
                  elif [ -n "$best_gid_index" ]; then
                      echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)"

                      export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}

                      echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX"
                  else
                      echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA."
                  fi
              else
                  echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference."
              fi
            fi

            # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011).
            # Older versions still need the blanket --disable-uvicorn-access-log.
            ACCESS_LOG_ARGS="--disable-uvicorn-access-log"
            VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}')
            echo "[access-log-detect] vllm version='${VLLM_VERSION}'"
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then
              ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping"
            fi
            echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'"

            # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666).
            SHUTDOWN_TIMEOUT_ARGS=""
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then
              SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40"
            fi

            eval "exec vllm serve /mnt/models \
              --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \
              --port 8000 \
              ${ACCESS_LOG_ARGS} \
              ${SHUTDOWN_TIMEOUT_ARGS} \
              --enable-ssl-refresh \
              --ssl-certfile /var/run/kserve/tls/tls.crt \
              --ssl-keyfile /var/run/kserve/tls/tls.key \
              ${VLLM_ADDITIONAL_ARGS} \
              $@"
          - --
          env:
          - name: HOME
            value: /home
          - name: VLLM_LOGGING_LEVEL
            value: DEBUG
          - name: VLLM_CPU_KVCACHE_SPACE
            value: "1"
          - name: VLLM_ENABLE_V1_MULTIPROCESSING
            value: "0"
          - name: USER
            value: nonroot
          - name: TORCHINDUCTOR_CACHE_DIR
            value: /tmp/torchinductor-cache
          - name: HF_HUB_CACHE
            value: /models
          image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0
          imagePullPolicy: IfNotPresent
          lifecycle:
            preStop:
              exec:
                command:
                - /bin/sleep
                - "15"
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 10
          name: main
          ports:
          - containerPort: 8000
            protocol: TCP
          readinessProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            limits:
              cpu: "2"
              memory: 7Gi
            requests:
              cpu: 200m
              memory: 2Gi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /home
            name: home
          - mountPath: /tmp
            name: tmp-dir
          - mountPath: /dev/shm
            name: dshm
          - mountPath: /models
            name: model-cache
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /mnt
            name: kserve-provision-location
            readOnly: true
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          - hf://edbeeching/opt-125m-lora
          - /mnt/lora/lora-adapter-1
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          - name: CA_BUNDLE_CONFIGMAP_NAME
            value: odh-kserve-custom-ca-bundle
          - name: CA_BUNDLE_VOLUME_MOUNT_POINT
            value: /etc/ssl/custom-certs
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt
            name: kserve-provision-location
          - mountPath: /etc/ssl/custom-certs
            name: cabundle-cert
            readOnly: true
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        terminationGracePeriodSeconds: 60
        volumes:
        - emptyDir: {}
          name: home
        - emptyDir:
            medium: Memory
            sizeLimit: 1Gi
          name: dshm
        - emptyDir: {}
          name: model-cache
        - emptyDir: {}
          name: tmp-dir
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: llmisv3e414c2ba058a022dfd694dbcbac5b51-kserve-self-signed-certs
        - emptyDir: {}
          name: kserve-provision-location
        - configMap:
            defaultMode: 420
            name: odh-kserve-custom-ca-bundle
          name: cabundle-cert
  status:
    fullyLabeledReplicas: 1
    observedGeneration: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "1"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:45:13Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-router-scheduler
      app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-7ca60146
      app.kubernetes.io/part-of: llminferenceservice
      pod-template-hash: 7cdd64995b
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"0360faab-ce30-44dd-83be-0cb53b27c9f9"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:app.kubernetes.io/version: {}
                f:certificates.kserve.io/expiration-v2: {}
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"SSL_CERT_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":5557,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9002,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9003,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                k:{"name":"tokenizer"}:
                  .: {}
                  f:env:
                    .: {}
                    k:{"name":"TOKENIZERS_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8082,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/.cache"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models/base"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                  f:workingDir: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"STORAGE_ALLOW_PATTERNS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tokenizer-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-tmp"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-uds"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:45:13Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:45:45Z"
    name: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-router-scheduler-7cdd64995b
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: llmisvc-model-fb-opt-125m-with-7ca60146-kserve-router-scheduler
      uid: 0360faab-ce30-44dd-83be-0cb53b27c9f9
    resourceVersion: "54713"
    uid: fcaa5ab9-e3b2-4235-bd26-1a7bd182a576
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-router-scheduler
        app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-7ca60146
        app.kubernetes.io/part-of: llminferenceservice
        pod-template-hash: 7cdd64995b
    template:
      metadata:
        annotations:
          app.kubernetes.io/version: 0.7.0
          certificates.kserve.io/expiration-v2: "true"
        labels:
          app.kubernetes.io/component: llminferenceservice-router-scheduler
          app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-7ca60146
          app.kubernetes.io/part-of: llminferenceservice
          pod-template-hash: 7cdd64995b
      spec:
        containers:
        - args:
          - --config-text
          - |
            apiVersion: inference.networking.x-k8s.io/v1alpha1
            kind: EndpointPickerConfig
            plugins:
            - type: single-profile-handler
            - type: queue-scorer
            - type: prefix-cache-scorer
            - type: max-score-picker
            schedulingProfiles:
            - name: default
              plugins:
              - pluginRef: queue-scorer
                weight: 2
              - pluginRef: prefix-cache-scorer
                weight: 3
              - pluginRef: max-score-picker
          command:
          - /app/epp
          - --pool-name
          - llmisvc-model-fb-opt-125m-with-7ca60146-inference-pool
          - --pool-namespace
          - kserve-ci-e2e-test
          - --zap-encoder
          - json
          - --grpc-port
          - "9002"
          - --grpc-health-port
          - "9003"
          - --enable-cert-reload=true
          - --secure-serving=true
          - --model-server-metrics-scheme=https
          - --cert-path=/var/run/kserve/tls
          env:
          - name: SSL_CERT_DIR
            value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
          image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: liveness
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          name: main
          ports:
          - containerPort: 9002
            name: grpc
            protocol: TCP
          - containerPort: 9003
            name: grpc-health
            protocol: TCP
          - containerPort: 9090
            name: metrics
            protocol: TCP
          - containerPort: 5557
            name: zmq
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: readiness
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
        - env:
          - name: TOKENIZERS_DIR
            value: /mnt/models
          image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 5
          name: tokenizer
          ports:
          - containerPort: 8082
            name: health
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /tmp
            name: tokenizer-tmp
          - mountPath: /.cache
            name: tokenizer-cache
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
          - mountPath: /mnt/models/base
            name: kserve-provision-location
            readOnly: true
          workingDir: /mnt/models
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          - name: STORAGE_ALLOW_PATTERNS
            value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json",
              "vocab.json", "merges.txt", "config.json", "generation_config.json"]'
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        serviceAccount: llmisvc-model-fb-opt-125m-with-7ca60146-epp-sa
        serviceAccountName: llmisvc-model-fb-opt-125m-with-7ca60146-epp-sa
        terminationGracePeriodSeconds: 30
        volumes:
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: llmisv3e414c2ba058a022dfd694dbcbac5b51-kserve-self-signed-certs
        - emptyDir: {}
          name: tokenizer-uds
        - emptyDir: {}
          name: tokenizer-tmp
        - emptyDir: {}
          name: tokenizer-cache
        - emptyDir: {}
          name: kserve-provision-location
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:59:59Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-workload
      app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-ba4d693a
      app.kubernetes.io/part-of: llminferenceservice
      kserve.io/component: workload
      llm-d.ai/role: both
      pod-template-hash: 766cc944c5
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:kserve.io/component: {}
            f:llm-d.ai/role: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"acadaa95-e45b-44bb-905a-66dc21d09de1"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:kserve.io/component: {}
                f:llm-d.ai/role: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"HF_HUB_CACHE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HOME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"TORCHINDUCTOR_CACHE_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"USER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_CPU_KVCACHE_SPACE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_LOGGING_LEVEL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:lifecycle:
                    .: {}
                    f:preStop:
                      .: {}
                      f:exec:
                        .: {}
                        f:command: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8000,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/dev/shm"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/home"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"CA_BUNDLE_CONFIGMAP_NAME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"CA_BUNDLE_VOLUME_MOUNT_POINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/etc/ssl/custom-certs"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/mnt"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"cabundle-cert"}:
                  .: {}
                  f:configMap:
                    .: {}
                    f:defaultMode: {}
                    f:name: {}
                  f:name: {}
                k:{"name":"dshm"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                    f:sizeLimit: {}
                  f:name: {}
                k:{"name":"home"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"model-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tmp-dir"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:59:59Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T07:01:49Z"
    name: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-766cc944c5
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve
      uid: acadaa95-e45b-44bb-905a-66dc21d09de1
    resourceVersion: "67997"
    uid: 32f9748e-052a-43d4-bd64-34af57930dcb
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-workload
        app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-ba4d693a
        app.kubernetes.io/part-of: llminferenceservice
        kserve.io/component: workload
        llm-d.ai/role: both
        pod-template-hash: 766cc944c5
    template:
      metadata:
        labels:
          app.kubernetes.io/component: llminferenceservice-workload
          app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-ba4d693a
          app.kubernetes.io/part-of: llminferenceservice
          kserve.io/component: workload
          llm-d.ai/role: both
          pod-template-hash: 766cc944c5
      spec:
        containers:
        - args:
          - --enable-lora
          - --lora-modules
          - '''{"name":"lora-adapter-1","path":"/mnt/lora/lora-adapter-1"}'''
          - '''{"name":"publishers/kserve-ci-e2e-test/models/lora-adapter-1","path":"/mnt/lora/lora-adapter-1"}'''
          command:
          - /bin/bash
          - -c
          - |-
            if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then
              source /etc/profile.d/ibm-aiu-setup.sh
            fi

            if [ "$KSERVE_INFER_ROCE" = "true" ]; then
              echo "Trying to infer RoCE configs ... "
              grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null
              grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null

              cat /proc/driver/nvidia/params

              KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"}

              echo "[Infer RoCE] Discovering active HCAs ..."
              active_hcas=()
              # Loop through all mlx5 devices found in sysfs
              for hca_dir in /sys/class/infiniband/mlx5_*; do
                  # Ensure it's a directory before proceeding
                  if [ -d "$hca_dir" ]; then
                      hca_name=$(basename "$hca_dir")
                      port_state_file="$hca_dir/ports/1/state" # Assume port 1
                      type_file="$hca_dir/ports/1/gid_attrs/types/*"

                      echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'"
                      if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then
                          echo "[Infer RoCE] Found active HCA: $hca_name"
                          active_hcas+=("$hca_name")
                      else
                          echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name"
                      fi
                  fi
              done

              ucx_hcas=()
              for hca in "${active_hcas[@]}"; do
                ucx_hcas+=("${hca}:1")
              done

              # Check if we found any active HCAs
              if [ ${#active_hcas[@]} -gt 0 ]; then
                  # Join the array elements with a comma
                  hcas=$(IFS=,; echo "${active_hcas[*]}")
                  echo "[Infer RoCE] Setting active HCAs: ${hcas}"
                  export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}
                  export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}
                  export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}

                  echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}"
                  echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}"
              else
                  echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set."
              fi

              if [ ${#active_hcas[@]} -gt 0 ]; then
                  echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..."

                  # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs
                  declare -A gid_index_count
                  declare -A hca_gid_index

                  for hca_name in "${active_hcas[@]}"; do
                      echo "[Infer RoCE] Processing HCA: ${hca_name}"

                      # Find all RoCE v2 IPv4 GIDs for this HCA and count by index
                      for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do
                          if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then
                              idx=$(basename "$tpath")
                              gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}"
                              # Check for IPv4 GID (contains ffff:)
                              if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then
                                  gid_value=$(cat "$gid_file" 2>/dev/null || echo "")
                                  echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}"
                                  hca_gid_index["${hca_name}"]="${idx}"
                                  gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1))
                                  break  # Use first found IPv4 GID per HCA
                              fi
                          fi
                      done
                  done

                  # Find the most common GID index (most likely to be consistent across nodes)
                  best_gid_index=""
                  max_count=0
                  for idx in "${!gid_index_count[@]}"; do
                      count=${gid_index_count["${idx}"]}
                      echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs"
                      if [ $count -gt $max_count ]; then
                          max_count=$count
                          best_gid_index="$idx"
                      fi
                  done

                  # Use deterministic fallback if counts are equal - prefer lower index number
                  if [ ${#gid_index_count[@]} -gt 1 ]; then
                      echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}"
                      # If there's a tie, prefer index 3 as it's most common in SR-IOV setups
                      if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then
                          best_gid_index="3"
                          echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)"
                      fi
                  fi

                  # Check if GID_INDEX is already set via environment variables
                  if [ -n "${NCCL_IB_GID_INDEX}" ]; then
                      echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment"
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX"
                  elif [ -n "$best_gid_index" ]; then
                      echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)"

                      export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}

                      echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX"
                  else
                      echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA."
                  fi
              else
                  echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference."
              fi
            fi

            # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011).
            # Older versions still need the blanket --disable-uvicorn-access-log.
            ACCESS_LOG_ARGS="--disable-uvicorn-access-log"
            VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}')
            echo "[access-log-detect] vllm version='${VLLM_VERSION}'"
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then
              ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping"
            fi
            echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'"

            # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666).
            SHUTDOWN_TIMEOUT_ARGS=""
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then
              SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40"
            fi

            eval "exec vllm serve /mnt/models \
              --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \
              --port 8000 \
              ${ACCESS_LOG_ARGS} \
              ${SHUTDOWN_TIMEOUT_ARGS} \
              --enable-ssl-refresh \
              --ssl-certfile /var/run/kserve/tls/tls.crt \
              --ssl-keyfile /var/run/kserve/tls/tls.key \
              ${VLLM_ADDITIONAL_ARGS} \
              $@"
          - --
          env:
          - name: HOME
            value: /home
          - name: VLLM_LOGGING_LEVEL
            value: DEBUG
          - name: VLLM_CPU_KVCACHE_SPACE
            value: "1"
          - name: VLLM_ENABLE_V1_MULTIPROCESSING
            value: "0"
          - name: USER
            value: nonroot
          - name: TORCHINDUCTOR_CACHE_DIR
            value: /tmp/torchinductor-cache
          - name: HF_HUB_CACHE
            value: /models
          image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0
          imagePullPolicy: IfNotPresent
          lifecycle:
            preStop:
              exec:
                command:
                - /bin/sleep
                - "15"
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 10
          name: main
          ports:
          - containerPort: 8000
            protocol: TCP
          readinessProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            limits:
              cpu: "2"
              memory: 7Gi
            requests:
              cpu: 200m
              memory: 2Gi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /home
            name: home
          - mountPath: /tmp
            name: tmp-dir
          - mountPath: /dev/shm
            name: dshm
          - mountPath: /models
            name: model-cache
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /mnt
            name: kserve-provision-location
            readOnly: true
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          - hf://edbeeching/opt-125m-lora
          - /mnt/lora/lora-adapter-1
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          - name: CA_BUNDLE_CONFIGMAP_NAME
            value: odh-kserve-custom-ca-bundle
          - name: CA_BUNDLE_VOLUME_MOUNT_POINT
            value: /etc/ssl/custom-certs
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt
            name: kserve-provision-location
          - mountPath: /etc/ssl/custom-certs
            name: cabundle-cert
            readOnly: true
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        terminationGracePeriodSeconds: 60
        volumes:
        - emptyDir: {}
          name: home
        - emptyDir:
            medium: Memory
            sizeLimit: 1Gi
          name: dshm
        - emptyDir: {}
          name: model-cache
        - emptyDir: {}
          name: tmp-dir
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: llmisv77ff2528d3e9b4972cd9335229fce9f0-kserve-self-signed-certs
        - emptyDir: {}
          name: kserve-provision-location
        - configMap:
            defaultMode: 420
            name: odh-kserve-custom-ca-bundle
          name: cabundle-cert
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "1"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:59:59Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-router-scheduler
      app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-ba4d693a
      app.kubernetes.io/part-of: llminferenceservice
      pod-template-hash: 86f69d9999
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"6d628253-aec6-4d77-ba28-2a4ea7af3a3c"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:app.kubernetes.io/version: {}
                f:certificates.kserve.io/expiration-v2: {}
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"SSL_CERT_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":5557,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9002,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9003,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                k:{"name":"tokenizer"}:
                  .: {}
                  f:env:
                    .: {}
                    k:{"name":"TOKENIZERS_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8082,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/.cache"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models/base"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                  f:workingDir: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"STORAGE_ALLOW_PATTERNS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tokenizer-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-tmp"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-uds"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:59:59Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T07:00:31Z"
    name: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-router-scheduler-86f69d9999
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: llmisvc-model-fb-opt-125m-with-ba4d693a-kserve-router-scheduler
      uid: 6d628253-aec6-4d77-ba28-2a4ea7af3a3c
    resourceVersion: "66956"
    uid: c7ec8574-3741-403b-a777-99db38917f8f
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-router-scheduler
        app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-ba4d693a
        app.kubernetes.io/part-of: llminferenceservice
        pod-template-hash: 86f69d9999
    template:
      metadata:
        annotations:
          app.kubernetes.io/version: 0.7.0
          certificates.kserve.io/expiration-v2: "true"
        labels:
          app.kubernetes.io/component: llminferenceservice-router-scheduler
          app.kubernetes.io/name: llmisvc-model-fb-opt-125m-with-ba4d693a
          app.kubernetes.io/part-of: llminferenceservice
          pod-template-hash: 86f69d9999
      spec:
        containers:
        - args:
          - --config-text
          - |
            apiVersion: inference.networking.x-k8s.io/v1alpha1
            kind: EndpointPickerConfig
            plugins:
            - type: single-profile-handler
            - type: queue-scorer
            - type: prefix-cache-scorer
            - type: max-score-picker
            schedulingProfiles:
            - name: default
              plugins:
              - pluginRef: queue-scorer
                weight: 2
              - pluginRef: prefix-cache-scorer
                weight: 3
              - pluginRef: max-score-picker
          command:
          - /app/epp
          - --pool-name
          - llmisvc-model-fb-opt-125m-with-ba4d693a-inference-pool
          - --pool-namespace
          - kserve-ci-e2e-test
          - --zap-encoder
          - json
          - --grpc-port
          - "9002"
          - --grpc-health-port
          - "9003"
          - --enable-cert-reload=true
          - --secure-serving=true
          - --model-server-metrics-scheme=https
          - --cert-path=/var/run/kserve/tls
          env:
          - name: SSL_CERT_DIR
            value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
          image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: liveness
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          name: main
          ports:
          - containerPort: 9002
            name: grpc
            protocol: TCP
          - containerPort: 9003
            name: grpc-health
            protocol: TCP
          - containerPort: 9090
            name: metrics
            protocol: TCP
          - containerPort: 5557
            name: zmq
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: readiness
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
        - env:
          - name: TOKENIZERS_DIR
            value: /mnt/models
          image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 5
          name: tokenizer
          ports:
          - containerPort: 8082
            name: health
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /tmp
            name: tokenizer-tmp
          - mountPath: /.cache
            name: tokenizer-cache
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
          - mountPath: /mnt/models/base
            name: kserve-provision-location
            readOnly: true
          workingDir: /mnt/models
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          - name: STORAGE_ALLOW_PATTERNS
            value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json",
              "vocab.json", "merges.txt", "config.json", "generation_config.json"]'
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        serviceAccount: llmisvc-model-fb-opt-125m-with-ba4d693a-epp-sa
        serviceAccountName: llmisvc-model-fb-opt-125m-with-ba4d693a-epp-sa
        terminationGracePeriodSeconds: 30
        volumes:
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: llmisv77ff2528d3e9b4972cd9335229fce9f0-kserve-self-signed-certs
        - emptyDir: {}
          name: tokenizer-uds
        - emptyDir: {}
          name: tokenizer-tmp
        - emptyDir: {}
          name: tokenizer-cache
        - emptyDir: {}
          name: kserve-provision-location
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:25:41Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-workload
      app.kubernetes.io/name: llmisvc-router-managed-test-llm-4b931143
      app.kubernetes.io/part-of: llminferenceservice
      kserve.io/component: workload
      llm-d.ai/role: both
      pod-template-hash: 66f88bc44d
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:kserve.io/component: {}
            f:llm-d.ai/role: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"462c0065-8494-4483-86c7-2e6ab2e545d7"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:kserve.io/component: {}
                f:llm-d.ai/role: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"HF_HUB_CACHE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HOME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_LOGGING_LEVEL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:lifecycle:
                    .: {}
                    f:preStop:
                      .: {}
                      f:exec:
                        .: {}
                        f:command: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8000,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/dev/shm"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/home"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
              f:dnsPolicy: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"dshm"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                    f:sizeLimit: {}
                  f:name: {}
                k:{"name":"home"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"model-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tmp-dir"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:25:41Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:25:52Z"
    name: llmisvc-router-managed-test-llm-4b931143-kserve-66f88bc44d
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: llmisvc-router-managed-test-llm-4b931143-kserve
      uid: 462c0065-8494-4483-86c7-2e6ab2e545d7
    resourceVersion: "41065"
    uid: 3b204cae-4fb7-4b0e-a6e5-c2274d4de174
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-workload
        app.kubernetes.io/name: llmisvc-router-managed-test-llm-4b931143
        app.kubernetes.io/part-of: llminferenceservice
        kserve.io/component: workload
        llm-d.ai/role: both
        pod-template-hash: 66f88bc44d
    template:
      metadata:
        labels:
          app.kubernetes.io/component: llminferenceservice-workload
          app.kubernetes.io/name: llmisvc-router-managed-test-llm-4b931143
          app.kubernetes.io/part-of: llminferenceservice
          kserve.io/component: workload
          llm-d.ai/role: both
          pod-template-hash: 66f88bc44d
      spec:
        containers:
        - args:
          - --port
          - "8000"
          - --model
          - facebook/opt-125m
          - --mode
          - random
          - --ssl-certfile
          - /var/run/kserve/tls/tls.crt
          - --ssl-keyfile
          - /var/run/kserve/tls/tls.key
          command:
          - /app/llm-d-inference-sim
          env:
          - name: HOME
            value: /home
          - name: VLLM_LOGGING_LEVEL
            value: INFO
          - name: HF_HUB_CACHE
            value: /models
          image: ghcr.io/llm-d/llm-d-inference-sim:v0.8.2
          imagePullPolicy: IfNotPresent
          lifecycle:
            preStop:
              exec:
                command:
                - /bin/sleep
                - "15"
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 10
          name: main
          ports:
          - containerPort: 8000
            protocol: TCP
          readinessProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            limits:
              cpu: "1"
              memory: 2Gi
            requests:
              cpu: 200m
              memory: 2Gi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /home
            name: home
          - mountPath: /tmp
            name: tmp-dir
          - mountPath: /dev/shm
            name: dshm
          - mountPath: /models
            name: model-cache
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
        dnsPolicy: ClusterFirst
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        terminationGracePeriodSeconds: 60
        volumes:
        - emptyDir: {}
          name: home
        - emptyDir:
            medium: Memory
            sizeLimit: 1Gi
          name: dshm
        - emptyDir: {}
          name: model-cache
        - emptyDir: {}
          name: tmp-dir
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: llmisvca2d2d7d499abb359505529ebe02c136-kserve-self-signed-certs
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:06:39Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-workload
      app.kubernetes.io/name: llmisvc-router-managed-test-llm-5b1e8f15
      app.kubernetes.io/part-of: llminferenceservice
      kserve.io/component: workload
      llm-d.ai/role: both
      pod-template-hash: 7c5bd57d44
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:kserve.io/component: {}
            f:llm-d.ai/role: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"d9d19e2e-683b-47fc-916b-45c9bc224baa"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:kserve.io/component: {}
                f:llm-d.ai/role: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"HF_HUB_CACHE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HOME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_LOGGING_LEVEL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:lifecycle:
                    .: {}
                    f:preStop:
                      .: {}
                      f:exec:
                        .: {}
                        f:command: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8000,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/dev/shm"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/home"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
              f:dnsPolicy: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"dshm"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                    f:sizeLimit: {}
                  f:name: {}
                k:{"name":"home"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"model-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tmp-dir"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:06:39Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:06:49Z"
    name: llmisvc-router-managed-test-llm-5b1e8f15-kserve-7c5bd57d44
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: llmisvc-router-managed-test-llm-5b1e8f15-kserve
      uid: d9d19e2e-683b-47fc-916b-45c9bc224baa
    resourceVersion: "27613"
    uid: b6522ee7-6b00-47fc-8f8e-7b5f486604f6
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-workload
        app.kubernetes.io/name: llmisvc-router-managed-test-llm-5b1e8f15
        app.kubernetes.io/part-of: llminferenceservice
        kserve.io/component: workload
        llm-d.ai/role: both
        pod-template-hash: 7c5bd57d44
    template:
      metadata:
        labels:
          app.kubernetes.io/component: llminferenceservice-workload
          app.kubernetes.io/name: llmisvc-router-managed-test-llm-5b1e8f15
          app.kubernetes.io/part-of: llminferenceservice
          kserve.io/component: workload
          llm-d.ai/role: both
          pod-template-hash: 7c5bd57d44
      spec:
        containers:
        - args:
          - --port
          - "8000"
          - --model
          - facebook/opt-125m
          - --mode
          - random
          - --ssl-certfile
          - /var/run/kserve/tls/tls.crt
          - --ssl-keyfile
          - /var/run/kserve/tls/tls.key
          command:
          - /app/llm-d-inference-sim
          env:
          - name: HOME
            value: /home
          - name: VLLM_LOGGING_LEVEL
            value: INFO
          - name: HF_HUB_CACHE
            value: /models
          image: ghcr.io/llm-d/llm-d-inference-sim:v0.8.2
          imagePullPolicy: IfNotPresent
          lifecycle:
            preStop:
              exec:
                command:
                - /bin/sleep
                - "15"
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 10
          name: main
          ports:
          - containerPort: 8000
            protocol: TCP
          readinessProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            limits:
              cpu: "1"
              memory: 2Gi
            requests:
              cpu: 200m
              memory: 2Gi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /home
            name: home
          - mountPath: /tmp
            name: tmp-dir
          - mountPath: /dev/shm
            name: dshm
          - mountPath: /models
            name: model-cache
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
        dnsPolicy: ClusterFirst
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        terminationGracePeriodSeconds: 60
        volumes:
        - emptyDir: {}
          name: home
        - emptyDir:
            medium: Memory
            sizeLimit: 1Gi
          name: dshm
        - emptyDir: {}
          name: model-cache
        - emptyDir: {}
          name: tmp-dir
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: llmisve55ae740357a3a31a27cdb8b66ffe20f-kserve-self-signed-certs
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "1"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:25:41Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-router-scheduler
      app.kubernetes.io/name: llmisvc-router-managed-test-llm-4b931143
      app.kubernetes.io/part-of: llminferenceservice
      pod-template-hash: 5597d7fd6
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"ecee695a-211a-459d-85d4-c556133b52d3"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:app.kubernetes.io/version: {}
                f:certificates.kserve.io/expiration-v2: {}
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"SSL_CERT_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":5557,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9002,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9003,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                k:{"name":"tokenizer"}:
                  .: {}
                  f:env:
                    .: {}
                    k:{"name":"TOKENIZERS_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8082,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/.cache"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                  f:workingDir: {}
              f:dnsPolicy: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tokenizer-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-tmp"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-uds"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:25:41Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:26:14Z"
    name: llmisvcca2d2d7d499abb359505529ebe02c136-kserve-router-scheduler-5597d7fd6
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: llmisvcca2d2d7d499abb359505529ebe02c136-kserve-router-scheduler
      uid: ecee695a-211a-459d-85d4-c556133b52d3
    resourceVersion: "41338"
    uid: d87575c6-df17-4fb6-b992-6d245459cce1
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-router-scheduler
        app.kubernetes.io/name: llmisvc-router-managed-test-llm-4b931143
        app.kubernetes.io/part-of: llminferenceservice
        pod-template-hash: 5597d7fd6
    template:
      metadata:
        annotations:
          app.kubernetes.io/version: 0.7.0
          certificates.kserve.io/expiration-v2: "true"
        labels:
          app.kubernetes.io/component: llminferenceservice-router-scheduler
          app.kubernetes.io/name: llmisvc-router-managed-test-llm-4b931143
          app.kubernetes.io/part-of: llminferenceservice
          pod-template-hash: 5597d7fd6
      spec:
        containers:
        - args:
          - --config-text
          - |
            apiVersion: inference.networking.x-k8s.io/v1alpha1
            kind: EndpointPickerConfig
            plugins:
            - type: single-profile-handler
            - type: queue-scorer
            - type: prefix-cache-scorer
            - type: max-score-picker
            schedulingProfiles:
            - name: default
              plugins:
              - pluginRef: queue-scorer
                weight: 2
              - pluginRef: prefix-cache-scorer
                weight: 3
              - pluginRef: max-score-picker
          command:
          - /app/epp
          - --pool-name
          - llmisvc-router-managed-test-llm-4b931143-inference-pool
          - --pool-namespace
          - kserve-ci-e2e-test
          - --zap-encoder
          - json
          - --grpc-port
          - "9002"
          - --grpc-health-port
          - "9003"
          - --enable-cert-reload=true
          - --secure-serving=true
          - --model-server-metrics-scheme=https
          - --cert-path=/var/run/kserve/tls
          env:
          - name: SSL_CERT_DIR
            value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
          image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: liveness
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          name: main
          ports:
          - containerPort: 9002
            name: grpc
            protocol: TCP
          - containerPort: 9003
            name: grpc-health
            protocol: TCP
          - containerPort: 9090
            name: metrics
            protocol: TCP
          - containerPort: 5557
            name: zmq
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: readiness
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
        - env:
          - name: TOKENIZERS_DIR
            value: /mnt/models
          image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 5
          name: tokenizer
          ports:
          - containerPort: 8082
            name: health
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /tmp
            name: tokenizer-tmp
          - mountPath: /.cache
            name: tokenizer-cache
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
          workingDir: /mnt/models
        dnsPolicy: ClusterFirst
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        serviceAccount: llmisvc-router-managed-test-llm-4b931143-epp-sa
        serviceAccountName: llmisvc-router-managed-test-llm-4b931143-epp-sa
        terminationGracePeriodSeconds: 30
        volumes:
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: llmisvca2d2d7d499abb359505529ebe02c136-kserve-self-signed-certs
        - emptyDir: {}
          name: tokenizer-uds
        - emptyDir: {}
          name: tokenizer-tmp
        - emptyDir: {}
          name: tokenizer-cache
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "1"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:06:39Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-router-scheduler
      app.kubernetes.io/name: llmisvc-router-managed-test-llm-5b1e8f15
      app.kubernetes.io/part-of: llminferenceservice
      pod-template-hash: 68b6785c7d
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"1692e624-f633-477f-917e-ebb5a2755d26"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:app.kubernetes.io/version: {}
                f:certificates.kserve.io/expiration-v2: {}
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"SSL_CERT_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":5557,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9002,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9003,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                k:{"name":"tokenizer"}:
                  .: {}
                  f:env:
                    .: {}
                    k:{"name":"TOKENIZERS_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8082,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/.cache"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                  f:workingDir: {}
              f:dnsPolicy: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tokenizer-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-tmp"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-uds"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:06:39Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:07:11Z"
    name: llmisvce55ae740357a3a31a27cdb8b66ffe20f-kserve-router-scheduler-68b6785c7d
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: llmisvce55ae740357a3a31a27cdb8b66ffe20f-kserve-router-scheduler
      uid: 1692e624-f633-477f-917e-ebb5a2755d26
    resourceVersion: "27855"
    uid: 0a54deb4-75ba-4b8b-8fb7-4a4fe96d4f69
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-router-scheduler
        app.kubernetes.io/name: llmisvc-router-managed-test-llm-5b1e8f15
        app.kubernetes.io/part-of: llminferenceservice
        pod-template-hash: 68b6785c7d
    template:
      metadata:
        annotations:
          app.kubernetes.io/version: 0.7.0
          certificates.kserve.io/expiration-v2: "true"
        labels:
          app.kubernetes.io/component: llminferenceservice-router-scheduler
          app.kubernetes.io/name: llmisvc-router-managed-test-llm-5b1e8f15
          app.kubernetes.io/part-of: llminferenceservice
          pod-template-hash: 68b6785c7d
      spec:
        containers:
        - args:
          - --config-text
          - |
            apiVersion: inference.networking.x-k8s.io/v1alpha1
            kind: EndpointPickerConfig
            plugins:
            - type: single-profile-handler
            - type: queue-scorer
            - type: prefix-cache-scorer
            - type: max-score-picker
            schedulingProfiles:
            - name: default
              plugins:
              - pluginRef: queue-scorer
                weight: 2
              - pluginRef: prefix-cache-scorer
                weight: 3
              - pluginRef: max-score-picker
          command:
          - /app/epp
          - --pool-name
          - llmisvc-router-managed-test-llm-5b1e8f15-inference-pool
          - --pool-namespace
          - kserve-ci-e2e-test
          - --zap-encoder
          - json
          - --grpc-port
          - "9002"
          - --grpc-health-port
          - "9003"
          - --enable-cert-reload=true
          - --secure-serving=true
          - --model-server-metrics-scheme=https
          - --cert-path=/var/run/kserve/tls
          env:
          - name: SSL_CERT_DIR
            value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
          image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: liveness
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          name: main
          ports:
          - containerPort: 9002
            name: grpc
            protocol: TCP
          - containerPort: 9003
            name: grpc-health
            protocol: TCP
          - containerPort: 9090
            name: metrics
            protocol: TCP
          - containerPort: 5557
            name: zmq
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: readiness
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
        - env:
          - name: TOKENIZERS_DIR
            value: /mnt/models
          image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 5
          name: tokenizer
          ports:
          - containerPort: 8082
            name: health
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /tmp
            name: tokenizer-tmp
          - mountPath: /.cache
            name: tokenizer-cache
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
          workingDir: /mnt/models
        dnsPolicy: ClusterFirst
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        serviceAccount: llmisvc-router-managed-test-llm-5b1e8f15-epp-sa
        serviceAccountName: llmisvc-router-managed-test-llm-5b1e8f15-epp-sa
        terminationGracePeriodSeconds: 30
        volumes:
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: llmisve55ae740357a3a31a27cdb8b66ffe20f-kserve-self-signed-certs
        - emptyDir: {}
          name: tokenizer-uds
        - emptyDir: {}
          name: tokenizer-tmp
        - emptyDir: {}
          name: tokenizer-cache
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:01:44Z"
    generation: 1
    labels:
      gateway.istio.io/managed: istio.io-gateway-controller
      gateway.networking.k8s.io/gateway-name: router-gateway-1
      pod-template-hash: 75dcfd69c9
      service.istio.io/canonical-name: router-gateway-1-openshift-default
      service.istio.io/canonical-revision: latest
      sidecar.istio.io/inject: "false"
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:gateway.istio.io/managed: {}
            f:gateway.networking.k8s.io/gateway-name: {}
            f:pod-template-hash: {}
            f:service.istio.io/canonical-name: {}
            f:service.istio.io/canonical-revision: {}
            f:sidecar.istio.io/inject: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"e49577c3-0a5f-4ac3-978b-c174825ed7ea"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:istio.io/rev: {}
                f:prometheus.io/path: {}
                f:prometheus.io/port: {}
                f:prometheus.io/scrape: {}
              f:labels:
                .: {}
                f:gateway.istio.io/managed: {}
                f:gateway.networking.k8s.io/gateway-name: {}
                f:pod-template-hash: {}
                f:service.istio.io/canonical-name: {}
                f:service.istio.io/canonical-revision: {}
                f:sidecar.istio.io/inject: {}
            f:spec:
              f:containers:
                k:{"name":"istio-proxy"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"CA_ADDR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"GOMAXPROCS"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:resourceFieldRef: {}
                    k:{"name":"GOMEMLIMIT"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:resourceFieldRef: {}
                    k:{"name":"HOST_IP"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"INSTANCE_IP"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"ISTIO_CPU_LIMIT"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:resourceFieldRef: {}
                    k:{"name":"ISTIO_META_APP_CONTAINERS"}:
                      .: {}
                      f:name: {}
                    k:{"name":"ISTIO_META_CLUSTER_ID"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_INTERCEPTION_MODE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_MESH_ID"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_NODE_NAME"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"ISTIO_META_OWNER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_POD_PORTS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_WORKLOAD_NAME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"PILOT_CERT_PROVIDER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"POD_NAME"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"POD_NAMESPACE"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"PROXY_CONFIG"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"SERVICE_ACCOUNT"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"TRUST_DOMAIN"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":15020,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":15021,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":15090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:privileged: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsGroup: {}
                    f:runAsNonRoot: {}
                    f:runAsUser: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/etc/istio/pod"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/etc/istio/proxy"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/lib/istio/data"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/credential-uds"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/istio"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/tokens"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/workload-spiffe-credentials"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/workload-spiffe-uds"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:dnsPolicy: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext:
                .: {}
                f:sysctls: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"credential-socket"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"istio-data"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"istio-envoy"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                  f:name: {}
                k:{"name":"istio-podinfo"}:
                  .: {}
                  f:downwardAPI:
                    .: {}
                    f:defaultMode: {}
                    f:items: {}
                  f:name: {}
                k:{"name":"istio-token"}:
                  .: {}
                  f:name: {}
                  f:projected:
                    .: {}
                    f:defaultMode: {}
                    f:sources: {}
                k:{"name":"istiod-ca-cert"}:
                  .: {}
                  f:configMap:
                    .: {}
                    f:defaultMode: {}
                    f:name: {}
                  f:name: {}
                k:{"name":"workload-certs"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"workload-socket"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:01:44Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:02:34Z"
    name: router-gateway-1-openshift-default-75dcfd69c9
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: router-gateway-1-openshift-default
      uid: e49577c3-0a5f-4ac3-978b-c174825ed7ea
    resourceVersion: "23489"
    uid: 146ffe43-5265-40ae-82ba-6fab35f508a9
  spec:
    replicas: 1
    selector:
      matchLabels:
        gateway.networking.k8s.io/gateway-name: router-gateway-1
        pod-template-hash: 75dcfd69c9
    template:
      metadata:
        annotations:
          istio.io/rev: openshift-gateway
          prometheus.io/path: /stats/prometheus
          prometheus.io/port: "15020"
          prometheus.io/scrape: "true"
        labels:
          gateway.istio.io/managed: istio.io-gateway-controller
          gateway.networking.k8s.io/gateway-name: router-gateway-1
          pod-template-hash: 75dcfd69c9
          service.istio.io/canonical-name: router-gateway-1-openshift-default
          service.istio.io/canonical-revision: latest
          sidecar.istio.io/inject: "false"
      spec:
        containers:
        - args:
          - proxy
          - router
          - --domain
          - $(POD_NAMESPACE).svc.cluster.local
          - --proxyLogLevel
          - warning
          - --proxyComponentLogLevel
          - misc:error
          - --log_output_level
          - default:info
          env:
          - name: PILOT_CERT_PROVIDER
            value: istiod
          - name: CA_ADDR
            value: istiod-openshift-gateway.openshift-ingress.svc:15012
          - name: POD_NAME
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.name
          - name: POD_NAMESPACE
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.namespace
          - name: INSTANCE_IP
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: status.podIP
          - name: SERVICE_ACCOUNT
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: spec.serviceAccountName
          - name: HOST_IP
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: status.hostIP
          - name: ISTIO_CPU_LIMIT
            valueFrom:
              resourceFieldRef:
                divisor: "0"
                resource: limits.cpu
          - name: PROXY_CONFIG
            value: |
              {"discoveryAddress":"istiod-openshift-gateway.openshift-ingress.svc:15012","proxyHeaders":{"server":{"disabled":true},"envoyDebugHeaders":{"disabled":true},"metadataExchangeHeaders":{"mode":"IN_MESH"}}}
          - name: ISTIO_META_POD_PORTS
            value: '[]'
          - name: ISTIO_META_APP_CONTAINERS
          - name: GOMEMLIMIT
            valueFrom:
              resourceFieldRef:
                divisor: "0"
                resource: limits.memory
          - name: GOMAXPROCS
            valueFrom:
              resourceFieldRef:
                divisor: "0"
                resource: limits.cpu
          - name: ISTIO_META_CLUSTER_ID
            value: Kubernetes
          - name: ISTIO_META_NODE_NAME
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: spec.nodeName
          - name: ISTIO_META_INTERCEPTION_MODE
            value: REDIRECT
          - name: ISTIO_META_WORKLOAD_NAME
            value: router-gateway-1-openshift-default
          - name: ISTIO_META_OWNER
            value: kubernetes://apis/apps/v1/namespaces/kserve-ci-e2e-test/deployments/router-gateway-1-openshift-default
          - name: ISTIO_META_MESH_ID
            value: cluster.local
          - name: TRUST_DOMAIN
            value: cluster.local
          image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:40be785b9abecd641f3121855a066c0ea01aba66e1350f33d175f2351c54e371
          imagePullPolicy: IfNotPresent
          name: istio-proxy
          ports:
          - containerPort: 15020
            name: metrics
            protocol: TCP
          - containerPort: 15021
            name: status-port
            protocol: TCP
          - containerPort: 15090
            name: http-envoy-prom
            protocol: TCP
          readinessProbe:
            failureThreshold: 4
            httpGet:
              path: /healthz/ready
              port: 15021
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            limits:
              cpu: "2"
              memory: 1Gi
            requests:
              cpu: 100m
              memory: 128Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            privileged: false
            readOnlyRootFilesystem: true
            runAsGroup: 1000709999
            runAsNonRoot: true
            runAsUser: 1000709999
          startupProbe:
            failureThreshold: 30
            httpGet:
              path: /healthz/ready
              port: 15021
              scheme: HTTP
            initialDelaySeconds: 1
            periodSeconds: 1
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: File
          volumeMounts:
          - mountPath: /var/run/secrets/workload-spiffe-uds
            name: workload-socket
          - mountPath: /var/run/secrets/credential-uds
            name: credential-socket
          - mountPath: /var/run/secrets/workload-spiffe-credentials
            name: workload-certs
          - mountPath: /var/run/secrets/istio
            name: istiod-ca-cert
          - mountPath: /var/lib/istio/data
            name: istio-data
          - mountPath: /etc/istio/proxy
            name: istio-envoy
          - mountPath: /var/run/secrets/tokens
            name: istio-token
          - mountPath: /etc/istio/pod
            name: istio-podinfo
        dnsPolicy: ClusterFirst
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext:
          sysctls:
          - name: net.ipv4.ip_unprivileged_port_start
            value: "0"
        serviceAccount: router-gateway-1-openshift-default
        serviceAccountName: router-gateway-1-openshift-default
        terminationGracePeriodSeconds: 30
        volumes:
        - emptyDir: {}
          name: workload-socket
        - emptyDir: {}
          name: credential-socket
        - emptyDir: {}
          name: workload-certs
        - emptyDir:
            medium: Memory
          name: istio-envoy
        - emptyDir: {}
          name: istio-data
        - downwardAPI:
            defaultMode: 420
            items:
            - fieldRef:
                apiVersion: v1
                fieldPath: metadata.labels
              path: labels
            - fieldRef:
                apiVersion: v1
                fieldPath: metadata.annotations
              path: annotations
          name: istio-podinfo
        - name: istio-token
          projected:
            defaultMode: 420
            sources:
            - serviceAccountToken:
                audience: istio-ca
                expirationSeconds: 43200
                path: istio-token
        - configMap:
            defaultMode: 420
            name: openshift-gw-ca-root-cert
          name: istiod-ca-cert
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:52:11Z"
    generation: 1
    labels:
      gateway.istio.io/managed: istio.io-gateway-controller
      gateway.networking.k8s.io/gateway-name: router-gateway-2
      pod-template-hash: 78c98f6f4c
      service.istio.io/canonical-name: router-gateway-2-openshift-default
      service.istio.io/canonical-revision: latest
      sidecar.istio.io/inject: "false"
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:gateway.istio.io/managed: {}
            f:gateway.networking.k8s.io/gateway-name: {}
            f:pod-template-hash: {}
            f:service.istio.io/canonical-name: {}
            f:service.istio.io/canonical-revision: {}
            f:sidecar.istio.io/inject: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"ae8c28b7-9822-40a1-b777-886bf929a87c"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:istio.io/rev: {}
                f:prometheus.io/path: {}
                f:prometheus.io/port: {}
                f:prometheus.io/scrape: {}
              f:labels:
                .: {}
                f:gateway.istio.io/managed: {}
                f:gateway.networking.k8s.io/gateway-name: {}
                f:pod-template-hash: {}
                f:service.istio.io/canonical-name: {}
                f:service.istio.io/canonical-revision: {}
                f:sidecar.istio.io/inject: {}
            f:spec:
              f:containers:
                k:{"name":"istio-proxy"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"CA_ADDR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"GOMAXPROCS"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:resourceFieldRef: {}
                    k:{"name":"GOMEMLIMIT"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:resourceFieldRef: {}
                    k:{"name":"HOST_IP"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"INSTANCE_IP"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"ISTIO_CPU_LIMIT"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:resourceFieldRef: {}
                    k:{"name":"ISTIO_META_APP_CONTAINERS"}:
                      .: {}
                      f:name: {}
                    k:{"name":"ISTIO_META_CLUSTER_ID"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_INTERCEPTION_MODE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_MESH_ID"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_NODE_NAME"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"ISTIO_META_OWNER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_POD_PORTS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"ISTIO_META_WORKLOAD_NAME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"PILOT_CERT_PROVIDER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"POD_NAME"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"POD_NAMESPACE"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"PROXY_CONFIG"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"SERVICE_ACCOUNT"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"TRUST_DOMAIN"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":15020,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":15021,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":15090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:privileged: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsGroup: {}
                    f:runAsNonRoot: {}
                    f:runAsUser: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/etc/istio/pod"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/etc/istio/proxy"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/lib/istio/data"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/credential-uds"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/istio"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/tokens"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/workload-spiffe-credentials"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/secrets/workload-spiffe-uds"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:dnsPolicy: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext:
                .: {}
                f:sysctls: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"credential-socket"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"istio-data"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"istio-envoy"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                  f:name: {}
                k:{"name":"istio-podinfo"}:
                  .: {}
                  f:downwardAPI:
                    .: {}
                    f:defaultMode: {}
                    f:items: {}
                  f:name: {}
                k:{"name":"istio-token"}:
                  .: {}
                  f:name: {}
                  f:projected:
                    .: {}
                    f:defaultMode: {}
                    f:sources: {}
                k:{"name":"istiod-ca-cert"}:
                  .: {}
                  f:configMap:
                    .: {}
                    f:defaultMode: {}
                    f:name: {}
                  f:name: {}
                k:{"name":"workload-certs"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"workload-socket"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:52:11Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:52:32Z"
    name: router-gateway-2-openshift-default-78c98f6f4c
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: router-gateway-2-openshift-default
      uid: ae8c28b7-9822-40a1-b777-886bf929a87c
    resourceVersion: "60453"
    uid: 8ea3db84-7947-45ae-8a53-0cf00386234c
  spec:
    replicas: 1
    selector:
      matchLabels:
        gateway.networking.k8s.io/gateway-name: router-gateway-2
        pod-template-hash: 78c98f6f4c
    template:
      metadata:
        annotations:
          istio.io/rev: openshift-gateway
          prometheus.io/path: /stats/prometheus
          prometheus.io/port: "15020"
          prometheus.io/scrape: "true"
        labels:
          gateway.istio.io/managed: istio.io-gateway-controller
          gateway.networking.k8s.io/gateway-name: router-gateway-2
          pod-template-hash: 78c98f6f4c
          service.istio.io/canonical-name: router-gateway-2-openshift-default
          service.istio.io/canonical-revision: latest
          sidecar.istio.io/inject: "false"
      spec:
        containers:
        - args:
          - proxy
          - router
          - --domain
          - $(POD_NAMESPACE).svc.cluster.local
          - --proxyLogLevel
          - warning
          - --proxyComponentLogLevel
          - misc:error
          - --log_output_level
          - default:info
          env:
          - name: PILOT_CERT_PROVIDER
            value: istiod
          - name: CA_ADDR
            value: istiod-openshift-gateway.openshift-ingress.svc:15012
          - name: POD_NAME
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.name
          - name: POD_NAMESPACE
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.namespace
          - name: INSTANCE_IP
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: status.podIP
          - name: SERVICE_ACCOUNT
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: spec.serviceAccountName
          - name: HOST_IP
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: status.hostIP
          - name: ISTIO_CPU_LIMIT
            valueFrom:
              resourceFieldRef:
                divisor: "0"
                resource: limits.cpu
          - name: PROXY_CONFIG
            value: |
              {"discoveryAddress":"istiod-openshift-gateway.openshift-ingress.svc:15012","proxyHeaders":{"server":{"disabled":true},"envoyDebugHeaders":{"disabled":true},"metadataExchangeHeaders":{"mode":"IN_MESH"}}}
          - name: ISTIO_META_POD_PORTS
            value: '[]'
          - name: ISTIO_META_APP_CONTAINERS
          - name: GOMEMLIMIT
            valueFrom:
              resourceFieldRef:
                divisor: "0"
                resource: limits.memory
          - name: GOMAXPROCS
            valueFrom:
              resourceFieldRef:
                divisor: "0"
                resource: limits.cpu
          - name: ISTIO_META_CLUSTER_ID
            value: Kubernetes
          - name: ISTIO_META_NODE_NAME
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: spec.nodeName
          - name: ISTIO_META_INTERCEPTION_MODE
            value: REDIRECT
          - name: ISTIO_META_WORKLOAD_NAME
            value: router-gateway-2-openshift-default
          - name: ISTIO_META_OWNER
            value: kubernetes://apis/apps/v1/namespaces/kserve-ci-e2e-test/deployments/router-gateway-2-openshift-default
          - name: ISTIO_META_MESH_ID
            value: cluster.local
          - name: TRUST_DOMAIN
            value: cluster.local
          image: registry.redhat.io/openshift-service-mesh/istio-proxyv2-rhel9@sha256:40be785b9abecd641f3121855a066c0ea01aba66e1350f33d175f2351c54e371
          imagePullPolicy: IfNotPresent
          name: istio-proxy
          ports:
          - containerPort: 15020
            name: metrics
            protocol: TCP
          - containerPort: 15021
            name: status-port
            protocol: TCP
          - containerPort: 15090
            name: http-envoy-prom
            protocol: TCP
          readinessProbe:
            failureThreshold: 4
            httpGet:
              path: /healthz/ready
              port: 15021
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            limits:
              cpu: "2"
              memory: 1Gi
            requests:
              cpu: 100m
              memory: 128Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            privileged: false
            readOnlyRootFilesystem: true
            runAsGroup: 1000709999
            runAsNonRoot: true
            runAsUser: 1000709999
          startupProbe:
            failureThreshold: 30
            httpGet:
              path: /healthz/ready
              port: 15021
              scheme: HTTP
            initialDelaySeconds: 1
            periodSeconds: 1
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: File
          volumeMounts:
          - mountPath: /var/run/secrets/workload-spiffe-uds
            name: workload-socket
          - mountPath: /var/run/secrets/credential-uds
            name: credential-socket
          - mountPath: /var/run/secrets/workload-spiffe-credentials
            name: workload-certs
          - mountPath: /var/run/secrets/istio
            name: istiod-ca-cert
          - mountPath: /var/lib/istio/data
            name: istio-data
          - mountPath: /etc/istio/proxy
            name: istio-envoy
          - mountPath: /var/run/secrets/tokens
            name: istio-token
          - mountPath: /etc/istio/pod
            name: istio-podinfo
        dnsPolicy: ClusterFirst
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext:
          sysctls:
          - name: net.ipv4.ip_unprivileged_port_start
            value: "0"
        serviceAccount: router-gateway-2-openshift-default
        serviceAccountName: router-gateway-2-openshift-default
        terminationGracePeriodSeconds: 30
        volumes:
        - emptyDir: {}
          name: workload-socket
        - emptyDir: {}
          name: credential-socket
        - emptyDir: {}
          name: workload-certs
        - emptyDir:
            medium: Memory
          name: istio-envoy
        - emptyDir: {}
          name: istio-data
        - downwardAPI:
            defaultMode: 420
            items:
            - fieldRef:
                apiVersion: v1
                fieldPath: metadata.labels
              path: labels
            - fieldRef:
                apiVersion: v1
                fieldPath: metadata.annotations
              path: annotations
          name: istio-podinfo
        - name: istio-token
          projected:
            defaultMode: 420
            sources:
            - serviceAccountToken:
                audience: istio-ca
                expirationSeconds: 43200
                path: istio-token
        - configMap:
            defaultMode: 420
            name: openshift-gw-ca-root-cert
          name: istiod-ca-cert
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:52:26Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-workload
      app.kubernetes.io/name: router-with-refs-pd-test
      app.kubernetes.io/part-of: llminferenceservice
      kserve.io/component: workload
      llm-d.ai/role: decode
      pod-template-hash: 6f78896447
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:kserve.io/component: {}
            f:llm-d.ai/role: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"90b48961-4b3b-49cb-84c9-f28da175b27c"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:kserve.io/component: {}
                f:llm-d.ai/role: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"HF_HUB_CACHE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HOME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"TORCHINDUCTOR_CACHE_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"USER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_CPU_KVCACHE_SPACE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_LOGGING_LEVEL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:lifecycle:
                    .: {}
                    f:preStop:
                      .: {}
                      f:exec:
                        .: {}
                        f:command: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8001,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/dev/shm"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/home"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"llm-d-routing-sidecar"}:
                  .: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"INFERENCE_POOL_NAME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"INFERENCE_POOL_NAMESPACE"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:fieldRef: {}
                    k:{"name":"SSL_CERT_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8000,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources: {}
                  f:restartPolicy: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"dshm"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                    f:sizeLimit: {}
                  f:name: {}
                k:{"name":"home"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"model-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tmp-dir"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:52:26Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:54:36Z"
    name: router-with-refs-pd-test-kserve-6f78896447
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: router-with-refs-pd-test-kserve
      uid: 90b48961-4b3b-49cb-84c9-f28da175b27c
    resourceVersion: "62196"
    uid: 3de390c7-64f1-433b-af14-e1d9f47e1412
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-workload
        app.kubernetes.io/name: router-with-refs-pd-test
        app.kubernetes.io/part-of: llminferenceservice
        kserve.io/component: workload
        llm-d.ai/role: decode
        pod-template-hash: 6f78896447
    template:
      metadata:
        labels:
          app.kubernetes.io/component: llminferenceservice-workload
          app.kubernetes.io/name: router-with-refs-pd-test
          app.kubernetes.io/part-of: llminferenceservice
          kserve.io/component: workload
          llm-d.ai/role: decode
          pod-template-hash: 6f78896447
      spec:
        containers:
        - command:
          - /bin/bash
          - -c
          - |-
            if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then
              source /etc/profile.d/ibm-aiu-setup.sh
            fi

            if [ "$KSERVE_INFER_ROCE" = "true" ]; then
              echo "Trying to infer RoCE configs ... "
              grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null
              grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null

              cat /proc/driver/nvidia/params

              KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"}

              echo "[Infer RoCE] Discovering active HCAs ..."
              active_hcas=()
              # Loop through all mlx5 devices found in sysfs
              for hca_dir in /sys/class/infiniband/mlx5_*; do
                  # Ensure it's a directory before proceeding
                  if [ -d "$hca_dir" ]; then
                      hca_name=$(basename "$hca_dir")
                      port_state_file="$hca_dir/ports/1/state" # Assume port 1
                      type_file="$hca_dir/ports/1/gid_attrs/types/*"

                      echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'"
                      if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then
                          echo "[Infer RoCE] Found active HCA: $hca_name"
                          active_hcas+=("$hca_name")
                      else
                          echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name"
                      fi
                  fi
              done

              ucx_hcas=()
              for hca in "${active_hcas[@]}"; do
                ucx_hcas+=("${hca}:1")
              done

              # Check if we found any active HCAs
              if [ ${#active_hcas[@]} -gt 0 ]; then
                  # Join the array elements with a comma
                  hcas=$(IFS=,; echo "${active_hcas[*]}")
                  echo "[Infer RoCE] Setting active HCAs: ${hcas}"
                  export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}
                  export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}
                  export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}

                  echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}"
                  echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}"
              else
                  echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set."
              fi

              if [ ${#active_hcas[@]} -gt 0 ]; then
                  echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..."

                  # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs
                  declare -A gid_index_count
                  declare -A hca_gid_index

                  for hca_name in "${active_hcas[@]}"; do
                      echo "[Infer RoCE] Processing HCA: ${hca_name}"

                      # Find all RoCE v2 IPv4 GIDs for this HCA and count by index
                      for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do
                          if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then
                              idx=$(basename "$tpath")
                              gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}"
                              # Check for IPv4 GID (contains ffff:)
                              if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then
                                  gid_value=$(cat "$gid_file" 2>/dev/null || echo "")
                                  echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}"
                                  hca_gid_index["${hca_name}"]="${idx}"
                                  gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1))
                                  break  # Use first found IPv4 GID per HCA
                              fi
                          fi
                      done
                  done

                  # Find the most common GID index (most likely to be consistent across nodes)
                  best_gid_index=""
                  max_count=0
                  for idx in "${!gid_index_count[@]}"; do
                      count=${gid_index_count["${idx}"]}
                      echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs"
                      if [ $count -gt $max_count ]; then
                          max_count=$count
                          best_gid_index="$idx"
                      fi
                  done

                  # Use deterministic fallback if counts are equal - prefer lower index number
                  if [ ${#gid_index_count[@]} -gt 1 ]; then
                      echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}"
                      # If there's a tie, prefer index 3 as it's most common in SR-IOV setups
                      if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then
                          best_gid_index="3"
                          echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)"
                      fi
                  fi

                  # Check if GID_INDEX is already set via environment variables
                  if [ -n "${NCCL_IB_GID_INDEX}" ]; then
                      echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment"
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX"
                  elif [ -n "$best_gid_index" ]; then
                      echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)"

                      export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}

                      echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX"
                  else
                      echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA."
                  fi
              else
                  echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference."
              fi
            fi

            # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011).
            # Older versions still need the blanket --disable-uvicorn-access-log.
            ACCESS_LOG_ARGS="--disable-uvicorn-access-log"
            VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}')
            echo "[access-log-detect] vllm version='${VLLM_VERSION}'"
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then
              ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping"
            fi
            echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'"

            # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666).
            SHUTDOWN_TIMEOUT_ARGS=""
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then
              SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40"
            fi

            eval "exec vllm serve /mnt/models \
              --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \
              --port 8001 \
              ${ACCESS_LOG_ARGS} \
              ${SHUTDOWN_TIMEOUT_ARGS} \
              --enable-ssl-refresh \
              --ssl-certfile /var/run/kserve/tls/tls.crt \
              --ssl-keyfile /var/run/kserve/tls/tls.key \
              ${VLLM_ADDITIONAL_ARGS} \
              $@"
          - --
          env:
          - name: HOME
            value: /home
          - name: VLLM_LOGGING_LEVEL
            value: DEBUG
          - name: VLLM_CPU_KVCACHE_SPACE
            value: "1"
          - name: VLLM_ENABLE_V1_MULTIPROCESSING
            value: "0"
          - name: USER
            value: nonroot
          - name: TORCHINDUCTOR_CACHE_DIR
            value: /tmp/torchinductor-cache
          - name: HF_HUB_CACHE
            value: /models
          image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0
          imagePullPolicy: IfNotPresent
          lifecycle:
            preStop:
              exec:
                command:
                - /bin/sleep
                - "15"
          livenessProbe:
            failureThreshold: 8
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            initialDelaySeconds: 180
            periodSeconds: 30
            successThreshold: 1
            timeoutSeconds: 30
          name: main
          ports:
          - containerPort: 8001
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            limits:
              cpu: "2"
              memory: 7Gi
            requests:
              cpu: 200m
              memory: 2Gi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8001
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /home
            name: home
          - mountPath: /tmp
            name: tmp-dir
          - mountPath: /dev/shm
            name: dshm
          - mountPath: /models
            name: model-cache
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /mnt/models
            name: kserve-provision-location
            readOnly: true
        dnsPolicy: ClusterFirst
        initContainers:
        - command:
          - /app/pd-sidecar
          - --port=8000
          - --vllm-port=8001
          - --kv-connector=nixlv2
          - --enable-ssrf-protection=true
          - --pool-group=inference.networking.x-k8s.io
          - --secure-proxy=true
          - --cert-path=/var/run/kserve/tls
          - --decoder-use-tls=true
          - --prefiller-use-tls=true
          env:
          - name: INFERENCE_POOL_NAMESPACE
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.namespace
          - name: SSL_CERT_DIR
            value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
          - name: INFERENCE_POOL_NAME
            value: router-with-refs-pd-test-inference-pool
          image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            initialDelaySeconds: 10
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 10
          name: llm-d-routing-sidecar
          ports:
          - containerPort: 8000
            protocol: TCP
          readinessProbe:
            failureThreshold: 10
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            initialDelaySeconds: 10
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources: {}
          restartPolicy: Always
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        serviceAccount: router-with-refs-pd-test-kserve
        serviceAccountName: router-with-refs-pd-test-kserve
        terminationGracePeriodSeconds: 60
        volumes:
        - emptyDir: {}
          name: home
        - emptyDir: {}
          name: tmp-dir
        - emptyDir:
            medium: Memory
            sizeLimit: 1Gi
          name: dshm
        - emptyDir: {}
          name: model-cache
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: router-with-refs-pd-test-kserve-self-signed-certs
        - emptyDir: {}
          name: kserve-provision-location
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:52:26Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-workload-prefill
      app.kubernetes.io/name: router-with-refs-pd-test
      app.kubernetes.io/part-of: llminferenceservice
      kserve.io/component: workload
      llm-d.ai/role: prefill
      pod-template-hash: 5fc8578dd5
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:kserve.io/component: {}
            f:llm-d.ai/role: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"726fb5c3-df02-4357-a03d-bfe9e0ab9d54"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:kserve.io/component: {}
                f:llm-d.ai/role: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"HF_HUB_CACHE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HOME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"TORCHINDUCTOR_CACHE_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"USER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_CPU_KVCACHE_SPACE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_LOGGING_LEVEL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:lifecycle:
                    .: {}
                    f:preStop:
                      .: {}
                      f:exec:
                        .: {}
                        f:command: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8000,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/dev/shm"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/home"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"dshm"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                    f:sizeLimit: {}
                  f:name: {}
                k:{"name":"home"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"model-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tmp-dir"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:52:26Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:55:16Z"
    name: router-with-refs-pd-test-kserve-prefill-5fc8578dd5
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: router-with-refs-pd-test-kserve-prefill
      uid: 726fb5c3-df02-4357-a03d-bfe9e0ab9d54
    resourceVersion: "63044"
    uid: b26d0305-15ed-46d8-987e-75acf4c15489
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-workload-prefill
        app.kubernetes.io/name: router-with-refs-pd-test
        app.kubernetes.io/part-of: llminferenceservice
        kserve.io/component: workload
        llm-d.ai/role: prefill
        pod-template-hash: 5fc8578dd5
    template:
      metadata:
        labels:
          app.kubernetes.io/component: llminferenceservice-workload-prefill
          app.kubernetes.io/name: router-with-refs-pd-test
          app.kubernetes.io/part-of: llminferenceservice
          kserve.io/component: workload
          llm-d.ai/role: prefill
          pod-template-hash: 5fc8578dd5
      spec:
        containers:
        - command:
          - /bin/bash
          - -c
          - |-
            if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then
              source /etc/profile.d/ibm-aiu-setup.sh
            fi

            if [ "$KSERVE_INFER_ROCE" = "true" ]; then
              echo "Trying to infer RoCE configs ... "
              grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null
              grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null

              cat /proc/driver/nvidia/params

              KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"}

              echo "[Infer RoCE] Discovering active HCAs ..."
              active_hcas=()
              # Loop through all mlx5 devices found in sysfs
              for hca_dir in /sys/class/infiniband/mlx5_*; do
                  # Ensure it's a directory before proceeding
                  if [ -d "$hca_dir" ]; then
                      hca_name=$(basename "$hca_dir")
                      port_state_file="$hca_dir/ports/1/state" # Assume port 1
                      type_file="$hca_dir/ports/1/gid_attrs/types/*"

                      echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'"
                      if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then
                          echo "[Infer RoCE] Found active HCA: $hca_name"
                          active_hcas+=("$hca_name")
                      else
                          echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name"
                      fi
                  fi
              done

              ucx_hcas=()
              for hca in "${active_hcas[@]}"; do
                ucx_hcas+=("${hca}:1")
              done

              # Check if we found any active HCAs
              if [ ${#active_hcas[@]} -gt 0 ]; then
                  # Join the array elements with a comma
                  hcas=$(IFS=,; echo "${active_hcas[*]}")
                  echo "[Infer RoCE] Setting active HCAs: ${hcas}"
                  export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}
                  export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}
                  export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}

                  echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}"
                  echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}"
              else
                  echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set."
              fi

              if [ ${#active_hcas[@]} -gt 0 ]; then
                  echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..."

                  # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs
                  declare -A gid_index_count
                  declare -A hca_gid_index

                  for hca_name in "${active_hcas[@]}"; do
                      echo "[Infer RoCE] Processing HCA: ${hca_name}"

                      # Find all RoCE v2 IPv4 GIDs for this HCA and count by index
                      for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do
                          if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then
                              idx=$(basename "$tpath")
                              gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}"
                              # Check for IPv4 GID (contains ffff:)
                              if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then
                                  gid_value=$(cat "$gid_file" 2>/dev/null || echo "")
                                  echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}"
                                  hca_gid_index["${hca_name}"]="${idx}"
                                  gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1))
                                  break  # Use first found IPv4 GID per HCA
                              fi
                          fi
                      done
                  done

                  # Find the most common GID index (most likely to be consistent across nodes)
                  best_gid_index=""
                  max_count=0
                  for idx in "${!gid_index_count[@]}"; do
                      count=${gid_index_count["${idx}"]}
                      echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs"
                      if [ $count -gt $max_count ]; then
                          max_count=$count
                          best_gid_index="$idx"
                      fi
                  done

                  # Use deterministic fallback if counts are equal - prefer lower index number
                  if [ ${#gid_index_count[@]} -gt 1 ]; then
                      echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}"
                      # If there's a tie, prefer index 3 as it's most common in SR-IOV setups
                      if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then
                          best_gid_index="3"
                          echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)"
                      fi
                  fi

                  # Check if GID_INDEX is already set via environment variables
                  if [ -n "${NCCL_IB_GID_INDEX}" ]; then
                      echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment"
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX"
                  elif [ -n "$best_gid_index" ]; then
                      echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)"

                      export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}

                      echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX"
                  else
                      echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA."
                  fi
              else
                  echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference."
              fi
            fi

            # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011).
            # Older versions still need the blanket --disable-uvicorn-access-log.
            ACCESS_LOG_ARGS="--disable-uvicorn-access-log"
            VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}')
            echo "[access-log-detect] vllm version='${VLLM_VERSION}'"
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then
              ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping"
            fi
            echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'"

            # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666).
            SHUTDOWN_TIMEOUT_ARGS=""
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then
              SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40"
            fi

            eval "exec vllm serve /mnt/models \
              --served-model-name "facebook/opt-125m" \
              --port 8000 \
              ${ACCESS_LOG_ARGS} \
              ${SHUTDOWN_TIMEOUT_ARGS} \
              --enable-ssl-refresh \
              --ssl-certfile /var/run/kserve/tls/tls.crt \
              --ssl-keyfile /var/run/kserve/tls/tls.key \
              ${VLLM_ADDITIONAL_ARGS} \
              $@"
          - --
          env:
          - name: HOME
            value: /home
          - name: VLLM_LOGGING_LEVEL
            value: DEBUG
          - name: VLLM_CPU_KVCACHE_SPACE
            value: "1"
          - name: VLLM_ENABLE_V1_MULTIPROCESSING
            value: "0"
          - name: USER
            value: nonroot
          - name: TORCHINDUCTOR_CACHE_DIR
            value: /tmp/torchinductor-cache
          - name: HF_HUB_CACHE
            value: /models
          image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0
          imagePullPolicy: IfNotPresent
          lifecycle:
            preStop:
              exec:
                command:
                - /bin/sleep
                - "15"
          livenessProbe:
            failureThreshold: 8
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            initialDelaySeconds: 180
            periodSeconds: 30
            successThreshold: 1
            timeoutSeconds: 30
          name: main
          ports:
          - containerPort: 8000
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            limits:
              cpu: "2"
              memory: 7Gi
            requests:
              cpu: 200m
              memory: 2Gi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: File
          volumeMounts:
          - mountPath: /home
            name: home
          - mountPath: /tmp
            name: tmp-dir
          - mountPath: /dev/shm
            name: dshm
          - mountPath: /models
            name: model-cache
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /mnt/models
            name: kserve-provision-location
            readOnly: true
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        terminationGracePeriodSeconds: 60
        volumes:
        - emptyDir: {}
          name: home
        - emptyDir: {}
          name: tmp-dir
        - emptyDir:
            medium: Memory
            sizeLimit: 1Gi
          name: dshm
        - emptyDir: {}
          name: model-cache
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: router-with-refs-pd-test-kserve-self-signed-certs
        - emptyDir: {}
          name: kserve-provision-location
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "1"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:52:27Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-router-scheduler
      app.kubernetes.io/name: router-with-refs-pd-test
      app.kubernetes.io/part-of: llminferenceservice
      pod-template-hash: 5f7487fdfb
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"913912b8-37a0-416c-a402-6894467656a9"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:app.kubernetes.io/version: {}
                f:certificates.kserve.io/expiration-v2: {}
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"SSL_CERT_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":5557,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9002,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9003,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                k:{"name":"tokenizer"}:
                  .: {}
                  f:env:
                    .: {}
                    k:{"name":"TOKENIZERS_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8082,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/.cache"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models/base"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                  f:workingDir: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"STORAGE_ALLOW_PATTERNS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tokenizer-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-tmp"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-uds"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:52:27Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:53:00Z"
    name: router-with-refs-pd-test-kserve-router-scheduler-5f7487fdfb
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: router-with-refs-pd-test-kserve-router-scheduler
      uid: 913912b8-37a0-416c-a402-6894467656a9
    resourceVersion: "60858"
    uid: 0c8740dd-1b7a-4125-9d09-4e1cc9c1ff89
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-router-scheduler
        app.kubernetes.io/name: router-with-refs-pd-test
        app.kubernetes.io/part-of: llminferenceservice
        pod-template-hash: 5f7487fdfb
    template:
      metadata:
        annotations:
          app.kubernetes.io/version: 0.7.0
          certificates.kserve.io/expiration-v2: "true"
        labels:
          app.kubernetes.io/component: llminferenceservice-router-scheduler
          app.kubernetes.io/name: router-with-refs-pd-test
          app.kubernetes.io/part-of: llminferenceservice
          pod-template-hash: 5f7487fdfb
      spec:
        containers:
        - args:
          - --config-text
          - |
            apiVersion: inference.networking.x-k8s.io/v1alpha1
            kind: EndpointPickerConfig
            plugins:
            - type: disagg-headers-handler
            - type: prefill-filter
            - type: decode-filter
            - type: queue-scorer
            - type: prefix-cache-scorer
            - type: max-score-picker
            - type: always-disagg-pd-decider
            - parameters:
                deciders:
                  prefill: always-disagg-pd-decider
              type: disagg-profile-handler
            schedulingProfiles:
            - name: prefill
              plugins:
              - pluginRef: prefill-filter
              - pluginRef: queue-scorer
                weight: 2
              - pluginRef: prefix-cache-scorer
                weight: 3
              - pluginRef: max-score-picker
            - name: decode
              plugins:
              - pluginRef: decode-filter
              - pluginRef: queue-scorer
                weight: 2
              - pluginRef: prefix-cache-scorer
                weight: 3
              - pluginRef: max-score-picker
          command:
          - /app/epp
          - --pool-name
          - router-with-refs-pd-test-inference-pool
          - --pool-namespace
          - kserve-ci-e2e-test
          - --zap-encoder
          - json
          - --grpc-port
          - "9002"
          - --grpc-health-port
          - "9003"
          - --enable-cert-reload=true
          - --secure-serving=true
          - --model-server-metrics-scheme=https
          - --cert-path=/var/run/kserve/tls
          env:
          - name: SSL_CERT_DIR
            value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
          image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: liveness
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          name: main
          ports:
          - containerPort: 9002
            name: grpc
            protocol: TCP
          - containerPort: 9003
            name: grpc-health
            protocol: TCP
          - containerPort: 9090
            name: metrics
            protocol: TCP
          - containerPort: 5557
            name: zmq
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: readiness
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
        - env:
          - name: TOKENIZERS_DIR
            value: /mnt/models
          image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 5
          name: tokenizer
          ports:
          - containerPort: 8082
            name: health
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /tmp
            name: tokenizer-tmp
          - mountPath: /.cache
            name: tokenizer-cache
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
          - mountPath: /mnt/models/base
            name: kserve-provision-location
            readOnly: true
          workingDir: /mnt/models
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          - name: STORAGE_ALLOW_PATTERNS
            value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json",
              "vocab.json", "merges.txt", "config.json", "generation_config.json"]'
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        serviceAccount: router-with-refs-pd-test-epp-sa
        serviceAccountName: router-with-refs-pd-test-epp-sa
        terminationGracePeriodSeconds: 30
        volumes:
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: router-with-refs-pd-test-kserve-self-signed-certs
        - emptyDir: {}
          name: tokenizer-uds
        - emptyDir: {}
          name: tokenizer-tmp
        - emptyDir: {}
          name: tokenizer-cache
        - emptyDir: {}
          name: kserve-provision-location
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "2"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:30:12Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-workload
      app.kubernetes.io/name: router-with-refs-test
      app.kubernetes.io/part-of: llminferenceservice
      kserve.io/component: workload
      llm-d.ai/role: both
      pod-template-hash: 578d595fc
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:kserve.io/component: {}
            f:llm-d.ai/role: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"e8712d19-620c-43db-9ffb-79505eadcfbc"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:kserve.io/component: {}
                f:llm-d.ai/role: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"HF_HUB_CACHE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HOME"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"TORCHINDUCTOR_CACHE_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"USER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_CPU_KVCACHE_SPACE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_ENABLE_V1_MULTIPROCESSING"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"VLLM_LOGGING_LEVEL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:lifecycle:
                    .: {}
                    f:preStop:
                      .: {}
                      f:exec:
                        .: {}
                        f:command: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8000,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/dev/shm"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/home"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"dshm"}:
                  .: {}
                  f:emptyDir:
                    .: {}
                    f:medium: {}
                    f:sizeLimit: {}
                  f:name: {}
                k:{"name":"home"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"model-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tmp-dir"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:30:12Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:32:22Z"
    name: router-with-refs-test-kserve-578d595fc
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: router-with-refs-test-kserve
      uid: e8712d19-620c-43db-9ffb-79505eadcfbc
    resourceVersion: "45745"
    uid: 0d603bab-21f2-4f77-87e7-1041dbaae626
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-workload
        app.kubernetes.io/name: router-with-refs-test
        app.kubernetes.io/part-of: llminferenceservice
        kserve.io/component: workload
        llm-d.ai/role: both
        pod-template-hash: 578d595fc
    template:
      metadata:
        labels:
          app.kubernetes.io/component: llminferenceservice-workload
          app.kubernetes.io/name: router-with-refs-test
          app.kubernetes.io/part-of: llminferenceservice
          kserve.io/component: workload
          llm-d.ai/role: both
          pod-template-hash: 578d595fc
      spec:
        containers:
        - command:
          - /bin/bash
          - -c
          - |-
            if [ -f /etc/profile.d/ibm-aiu-setup.sh ]; then
              source /etc/profile.d/ibm-aiu-setup.sh
            fi

            if [ "$KSERVE_INFER_ROCE" = "true" ]; then
              echo "Trying to infer RoCE configs ... "
              grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null
              grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null

              cat /proc/driver/nvidia/params

              KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-"RoCE v2"}

              echo "[Infer RoCE] Discovering active HCAs ..."
              active_hcas=()
              # Loop through all mlx5 devices found in sysfs
              for hca_dir in /sys/class/infiniband/mlx5_*; do
                  # Ensure it's a directory before proceeding
                  if [ -d "$hca_dir" ]; then
                      hca_name=$(basename "$hca_dir")
                      port_state_file="$hca_dir/ports/1/state" # Assume port 1
                      type_file="$hca_dir/ports/1/gid_attrs/types/*"

                      echo "[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'"
                      if [ -f "$port_state_file" ] && grep -q "ACTIVE" "$port_state_file" && grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" ${type_file} 2>/dev/null; then
                          echo "[Infer RoCE] Found active HCA: $hca_name"
                          active_hcas+=("$hca_name")
                      else
                          echo "[Infer RoCE] Skipping inactive or down HCA: $hca_name"
                      fi
                  fi
              done

              ucx_hcas=()
              for hca in "${active_hcas[@]}"; do
                ucx_hcas+=("${hca}:1")
              done

              # Check if we found any active HCAs
              if [ ${#active_hcas[@]} -gt 0 ]; then
                  # Join the array elements with a comma
                  hcas=$(IFS=,; echo "${active_hcas[*]}")
                  echo "[Infer RoCE] Setting active HCAs: ${hcas}"
                  export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}
                  export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}
                  export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}

                  echo "[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}"
                  echo "[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}"
              else
                  echo "[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set."
              fi

              if [ ${#active_hcas[@]} -gt 0 ]; then
                  echo "[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)..."

                  # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs
                  declare -A gid_index_count
                  declare -A hca_gid_index

                  for hca_name in "${active_hcas[@]}"; do
                      echo "[Infer RoCE] Processing HCA: ${hca_name}"

                      # Find all RoCE v2 IPv4 GIDs for this HCA and count by index
                      for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do
                          if grep -q "${KSERVE_INFER_IB_GID_INDEX_GREP}" "$tpath" 2>/dev/null; then
                              idx=$(basename "$tpath")
                              gid_file="/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}"
                              # Check for IPv4 GID (contains ffff:)
                              if [ -f "$gid_file" ] && grep -q "ffff:" "$gid_file"; then
                                  gid_value=$(cat "$gid_file" 2>/dev/null || echo "")
                                  echo "[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}"
                                  hca_gid_index["${hca_name}"]="${idx}"
                                  gid_index_count["${idx}"]=$((${gid_index_count["${idx}"]} + 1))
                                  break  # Use first found IPv4 GID per HCA
                              fi
                          fi
                      done
                  done

                  # Find the most common GID index (most likely to be consistent across nodes)
                  best_gid_index=""
                  max_count=0
                  for idx in "${!gid_index_count[@]}"; do
                      count=${gid_index_count["${idx}"]}
                      echo "[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs"
                      if [ $count -gt $max_count ]; then
                          max_count=$count
                          best_gid_index="$idx"
                      fi
                  done

                  # Use deterministic fallback if counts are equal - prefer lower index number
                  if [ ${#gid_index_count[@]} -gt 1 ]; then
                      echo "[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}"
                      # If there's a tie, prefer index 3 as it's most common in SR-IOV setups
                      if [ -n "${gid_index_count['3']}" ] && [ "${gid_index_count['3']}" -eq "$max_count" ]; then
                          best_gid_index="3"
                          echo "[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)"
                      fi
                  fi

                  # Check if GID_INDEX is already set via environment variables
                  if [ -n "${NCCL_IB_GID_INDEX}" ]; then
                      echo "[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment"
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}
                      echo "[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX"
                  elif [ -n "$best_gid_index" ]; then
                      echo "[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)"

                      export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}
                      export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}
                      export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}

                      echo "[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX"
                  else
                      echo "[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA."
                  fi
              else
                  echo "[Infer RoCE] No active HCAs found, skipping GID_INDEX inference."
              fi
            fi

            # --disable-access-log-for-endpoints landed in vLLM 0.16.0 (vllm-project/vllm#30011).
            # Older versions still need the blanket --disable-uvicorn-access-log.
            ACCESS_LOG_ARGS="--disable-uvicorn-access-log"
            VLLM_VERSION=$(vllm --version 2>/dev/null | tail -1 | awk '{print $NF}')
            echo "[access-log-detect] vllm version='${VLLM_VERSION}'"
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.16.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.16.0" ]; then
              ACCESS_LOG_ARGS="--disable-access-log-for-endpoints /health,/metrics,/ping"
            fi
            echo "[access-log-detect] selected ACCESS_LOG_ARGS='${ACCESS_LOG_ARGS}'"

            # --shutdown-timeout landed in vLLM 0.18.0 (vllm-project/vllm#36666).
            SHUTDOWN_TIMEOUT_ARGS=""
            if [[ "$VLLM_VERSION" =~ ^[0-9]+\.[0-9]+ ]] && [ "$(printf '%s\n%s\n' "0.18.0" "${VLLM_VERSION}" | sort -V | head -1)" = "0.18.0" ]; then
              SHUTDOWN_TIMEOUT_ARGS="--shutdown-timeout 40"
            fi

            eval "exec vllm serve /mnt/models \
              --served-model-name "facebook/opt-125m" "publishers/kserve-ci-e2e-test/models/facebook/opt-125m" \
              --port 8000 \
              ${ACCESS_LOG_ARGS} \
              ${SHUTDOWN_TIMEOUT_ARGS} \
              --enable-ssl-refresh \
              --ssl-certfile /var/run/kserve/tls/tls.crt \
              --ssl-keyfile /var/run/kserve/tls/tls.key \
              ${VLLM_ADDITIONAL_ARGS} \
              $@"
          - --
          env:
          - name: HOME
            value: /home
          - name: VLLM_LOGGING_LEVEL
            value: DEBUG
          - name: VLLM_CPU_KVCACHE_SPACE
            value: "1"
          - name: VLLM_ENABLE_V1_MULTIPROCESSING
            value: "0"
          - name: USER
            value: nonroot
          - name: TORCHINDUCTOR_CACHE_DIR
            value: /tmp/torchinductor-cache
          - name: HF_HUB_CACHE
            value: /models
          image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.19.0
          imagePullPolicy: IfNotPresent
          lifecycle:
            preStop:
              exec:
                command:
                - /bin/sleep
                - "15"
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 10
          name: main
          ports:
          - containerPort: 8000
            protocol: TCP
          readinessProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            limits:
              cpu: "2"
              memory: 7Gi
            requests:
              cpu: 200m
              memory: 2Gi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /health
              port: 8000
              scheme: HTTPS
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /home
            name: home
          - mountPath: /tmp
            name: tmp-dir
          - mountPath: /dev/shm
            name: dshm
          - mountPath: /models
            name: model-cache
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /mnt/models
            name: kserve-provision-location
            readOnly: true
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        terminationGracePeriodSeconds: 60
        volumes:
        - emptyDir: {}
          name: home
        - emptyDir:
            medium: Memory
            sizeLimit: 1Gi
          name: dshm
        - emptyDir: {}
          name: model-cache
        - emptyDir: {}
          name: tmp-dir
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: router-with-refs-test-kserve-self-signed-certs
        - emptyDir: {}
          name: kserve-provision-location
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
- apiVersion: apps/v1
  kind: ReplicaSet
  metadata:
    annotations:
      deployment.kubernetes.io/desired-replicas: "1"
      deployment.kubernetes.io/max-replicas: "1"
      deployment.kubernetes.io/revision: "1"
    creationTimestamp: "2026-06-15T06:30:12Z"
    generation: 1
    labels:
      app.kubernetes.io/component: llminferenceservice-router-scheduler
      app.kubernetes.io/name: router-with-refs-test
      app.kubernetes.io/part-of: llminferenceservice
      pod-template-hash: 7d4868d689
    managedFields:
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:metadata:
          f:annotations:
            .: {}
            f:deployment.kubernetes.io/desired-replicas: {}
            f:deployment.kubernetes.io/max-replicas: {}
            f:deployment.kubernetes.io/revision: {}
          f:labels:
            .: {}
            f:app.kubernetes.io/component: {}
            f:app.kubernetes.io/name: {}
            f:app.kubernetes.io/part-of: {}
            f:pod-template-hash: {}
          f:ownerReferences:
            .: {}
            k:{"uid":"d5c4411e-483f-461f-922d-99d7ba299302"}: {}
        f:spec:
          f:replicas: {}
          f:selector: {}
          f:template:
            f:metadata:
              f:annotations:
                .: {}
                f:app.kubernetes.io/version: {}
                f:certificates.kserve.io/expiration-v2: {}
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/part-of: {}
                f:pod-template-hash: {}
            f:spec:
              f:containers:
                k:{"name":"main"}:
                  .: {}
                  f:args: {}
                  f:command: {}
                  f:env:
                    .: {}
                    k:{"name":"SSL_CERT_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":5557,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9002,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9003,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                    k:{"containerPort":9090,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:grpc:
                      .: {}
                      f:port: {}
                      f:service: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/var/run/kserve/tls"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                k:{"name":"tokenizer"}:
                  .: {}
                  f:env:
                    .: {}
                    k:{"name":"TOKENIZERS_DIR"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:livenessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:name: {}
                  f:ports:
                    .: {}
                    k:{"containerPort":8082,"protocol":"TCP"}:
                      .: {}
                      f:containerPort: {}
                      f:name: {}
                      f:protocol: {}
                  f:readinessProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:resources:
                    .: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:securityContext:
                    .: {}
                    f:allowPrivilegeEscalation: {}
                    f:capabilities:
                      .: {}
                      f:drop: {}
                    f:readOnlyRootFilesystem: {}
                    f:runAsNonRoot: {}
                    f:seccompProfile:
                      .: {}
                      f:type: {}
                  f:startupProbe:
                    .: {}
                    f:failureThreshold: {}
                    f:httpGet:
                      .: {}
                      f:path: {}
                      f:port: {}
                      f:scheme: {}
                    f:initialDelaySeconds: {}
                    f:periodSeconds: {}
                    f:successThreshold: {}
                    f:timeoutSeconds: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/.cache"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/mnt/models/base"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                      f:readOnly: {}
                    k:{"mountPath":"/tmp"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                    k:{"mountPath":"/tmp/tokenizer"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
                  f:workingDir: {}
              f:dnsPolicy: {}
              f:initContainers:
                .: {}
                k:{"name":"storage-initializer"}:
                  .: {}
                  f:args: {}
                  f:env:
                    .: {}
                    k:{"name":"AWS_ACCESS_KEY_ID"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"AWS_CA_BUNDLE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_CA_BUNDLE_CONFIGMAP"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_ENDPOINT_URL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"AWS_SECRET_ACCESS_KEY"}:
                      .: {}
                      f:name: {}
                      f:valueFrom:
                        .: {}
                        f:secretKeyRef: {}
                    k:{"name":"HF_HUB_ENABLE_HF_TRANSFER"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_HIGH_PERFORMANCE"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"HF_XET_NUM_CONCURRENT_RANGE_GETS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_ENDPOINT"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_USE_HTTPS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"S3_VERIFY_SSL"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                    k:{"name":"STORAGE_ALLOW_PATTERNS"}:
                      .: {}
                      f:name: {}
                      f:value: {}
                  f:image: {}
                  f:imagePullPolicy: {}
                  f:name: {}
                  f:resources:
                    .: {}
                    f:limits:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                    f:requests:
                      .: {}
                      f:cpu: {}
                      f:memory: {}
                  f:terminationMessagePath: {}
                  f:terminationMessagePolicy: {}
                  f:volumeMounts:
                    .: {}
                    k:{"mountPath":"/mnt/models"}:
                      .: {}
                      f:mountPath: {}
                      f:name: {}
              f:restartPolicy: {}
              f:schedulerName: {}
              f:securityContext: {}
              f:serviceAccount: {}
              f:serviceAccountName: {}
              f:terminationGracePeriodSeconds: {}
              f:volumes:
                .: {}
                k:{"name":"kserve-provision-location"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tls-certs"}:
                  .: {}
                  f:name: {}
                  f:secret:
                    .: {}
                    f:defaultMode: {}
                    f:secretName: {}
                k:{"name":"tokenizer-cache"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-tmp"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
                k:{"name":"tokenizer-uds"}:
                  .: {}
                  f:emptyDir: {}
                  f:name: {}
      manager: kube-controller-manager
      operation: Update
      time: "2026-06-15T06:30:12Z"
    - apiVersion: apps/v1
      fieldsType: FieldsV1
      fieldsV1:
        f:status:
          f:availableReplicas: {}
          f:fullyLabeledReplicas: {}
          f:observedGeneration: {}
          f:readyReplicas: {}
          f:replicas: {}
      manager: kube-controller-manager
      operation: Update
      subresource: status
      time: "2026-06-15T06:30:54Z"
    name: router-with-refs-test-kserve-router-scheduler-7d4868d689
    namespace: kserve-ci-e2e-test
    ownerReferences:
    - apiVersion: apps/v1
      blockOwnerDeletion: true
      controller: true
      kind: Deployment
      name: router-with-refs-test-kserve-router-scheduler
      uid: d5c4411e-483f-461f-922d-99d7ba299302
    resourceVersion: "44797"
    uid: dfa69409-5eec-4453-854d-0e1a9b183345
  spec:
    replicas: 1
    selector:
      matchLabels:
        app.kubernetes.io/component: llminferenceservice-router-scheduler
        app.kubernetes.io/name: router-with-refs-test
        app.kubernetes.io/part-of: llminferenceservice
        pod-template-hash: 7d4868d689
    template:
      metadata:
        annotations:
          app.kubernetes.io/version: 0.7.0
          certificates.kserve.io/expiration-v2: "true"
        labels:
          app.kubernetes.io/component: llminferenceservice-router-scheduler
          app.kubernetes.io/name: router-with-refs-test
          app.kubernetes.io/part-of: llminferenceservice
          pod-template-hash: 7d4868d689
      spec:
        containers:
        - args:
          - --config-text
          - |
            apiVersion: inference.networking.x-k8s.io/v1alpha1
            kind: EndpointPickerConfig
            plugins:
            - type: single-profile-handler
            - type: queue-scorer
            - type: prefix-cache-scorer
            - type: max-score-picker
            schedulingProfiles:
            - name: default
              plugins:
              - pluginRef: queue-scorer
                weight: 2
              - pluginRef: prefix-cache-scorer
                weight: 3
              - pluginRef: max-score-picker
          command:
          - /app/epp
          - --pool-name
          - router-with-refs-test-inference-pool
          - --pool-namespace
          - kserve-ci-e2e-test
          - --zap-encoder
          - json
          - --grpc-port
          - "9002"
          - --grpc-health-port
          - "9003"
          - --enable-cert-reload=true
          - --secure-serving=true
          - --model-server-metrics-scheme=https
          - --cert-path=/var/run/kserve/tls
          env:
          - name: SSL_CERT_DIR
            value: /var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs
          image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: liveness
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          name: main
          ports:
          - containerPort: 9002
            name: grpc
            protocol: TCP
          - containerPort: 9003
            name: grpc-health
            protocol: TCP
          - containerPort: 9090
            name: metrics
            protocol: TCP
          - containerPort: 5557
            name: zmq
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            grpc:
              port: 9003
              service: readiness
            initialDelaySeconds: 30
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/kserve/tls
            name: tls-certs
            readOnly: true
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
        - env:
          - name: TOKENIZERS_DIR
            value: /mnt/models
          image: ghcr.io/llm-d/llm-d-uds-tokenizer:v0.7.1
          imagePullPolicy: IfNotPresent
          livenessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 15
            successThreshold: 1
            timeoutSeconds: 5
          name: tokenizer
          ports:
          - containerPort: 8082
            name: health
            protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            requests:
              cpu: 256m
              memory: 500Mi
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
              - ALL
            readOnlyRootFilesystem: true
            runAsNonRoot: true
            seccompProfile:
              type: RuntimeDefault
          startupProbe:
            failureThreshold: 60
            httpGet:
              path: /healthz
              port: 8082
              scheme: HTTP
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /tmp
            name: tokenizer-tmp
          - mountPath: /.cache
            name: tokenizer-cache
          - mountPath: /tmp/tokenizer
            name: tokenizer-uds
          - mountPath: /mnt/models/base
            name: kserve-provision-location
            readOnly: true
          workingDir: /mnt/models
        dnsPolicy: ClusterFirst
        initContainers:
        - args:
          - hf://facebook/opt-125m
          - /mnt/models
          env:
          - name: AWS_ACCESS_KEY_ID
            valueFrom:
              secretKeyRef:
                key: AWS_ACCESS_KEY_ID
                name: seaweedfs-s3-creds
          - name: AWS_SECRET_ACCESS_KEY
            valueFrom:
              secretKeyRef:
                key: AWS_SECRET_ACCESS_KEY
                name: seaweedfs-s3-creds
          - name: S3_USE_HTTPS
            value: "0"
          - name: S3_ENDPOINT
            value: s3-service.kserve:8333
          - name: AWS_ENDPOINT_URL
            value: http://s3-service.kserve:8333
          - name: S3_VERIFY_SSL
            value: "0"
          - name: AWS_CA_BUNDLE
            value: /etc/ssl/custom-certs/cabundle.crt
          - name: AWS_CA_BUNDLE_CONFIGMAP
            value: odh-kserve-custom-ca-bundle
          - name: HF_HUB_ENABLE_HF_TRANSFER
            value: "1"
          - name: HF_XET_HIGH_PERFORMANCE
            value: "1"
          - name: HF_XET_NUM_CONCURRENT_RANGE_GETS
            value: "8"
          - name: STORAGE_ALLOW_PATTERNS
            value: '["tokenizer.json", "tokenizer_config.json", "special_tokens_map.json",
              "vocab.json", "merges.txt", "config.json", "generation_config.json"]'
          image: quay.io/opendatahub/kserve-storage-initializer@sha256:ba8edcbfb3f9312d158be16483785d7654e60c7090f262c42214fd2b29effada
          imagePullPolicy: IfNotPresent
          name: storage-initializer
          resources:
            limits:
              cpu: "1"
              memory: 24Gi
            requests:
              cpu: 100m
              memory: 100Mi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /mnt/models
            name: kserve-provision-location
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext: {}
        serviceAccount: router-with-refs-test-epp-sa
        serviceAccountName: router-with-refs-test-epp-sa
        terminationGracePeriodSeconds: 30
        volumes:
        - name: tls-certs
          secret:
            defaultMode: 420
            secretName: router-with-refs-test-kserve-self-signed-certs
        - emptyDir: {}
          name: tokenizer-uds
        - emptyDir: {}
          name: tokenizer-tmp
        - emptyDir: {}
          name: tokenizer-cache
        - emptyDir: {}
          name: kserve-provision-location
  status:
    availableReplicas: 1
    fullyLabeledReplicas: 1
    observedGeneration: 1
    readyReplicas: 1
    replicas: 1
kind: ReplicaSetList
metadata:
  resourceVersion: "97583"