--- apiVersion: v1 items: - apiVersion: v1 data: _example: |- ################################ # # # EXAMPLE CONFIGURATION # # # ################################ # This block is not actually functional configuration, # but serves to illustrate the available configuration # options and document them in a way that is accessible # to users that `kubectl edit` this config map. # # These sample configuration options may be copied out of # this example block and unindented to be in the data block # to actually change the configuration. # ====================================== EXPLAINERS CONFIGURATION ====================================== # Example explainers: |- { "art": { "image" : "kserve/art-explainer", "defaultImageVersion": "latest" } } # Art Explainer runtime configuration explainers: |- { # Art explainer runtime configuration "art": { # image contains the default Art explainer serving runtime image uri. "image" : "kserve/art-explainer", # defautltImageVersion contains the Art explainer serving runtime default image version. "defaultImageVersion": "latest" } } # ====================================== ISVC CONFIGURATION ====================================== # Example - setting custom annotation inferenceService: |- { "serviceAnnotationDisallowedList": [ "my.custom.annotation/1" ], "serviceLabelDisallowedList": [ "my.custom.label.1" ] } # Example - setting custom annotation inferenceService: |- { # ServiceAnnotationDisallowedList is a list of annotations that are not allowed to be propagated to Knative # revisions, which prevents the reconciliation loop to be triggered if the annotations is # configured here are used. # Default values are: # "autoscaling.knative.dev/min-scale", # "autoscaling.knative.dev/max-scale", # "internal.serving.kserve.io/storage-initializer-sourceuri", # "kubectl.kubernetes.io/last-applied-configuration", # "modelFormat" # Any new value will be appended to the list. "serviceAnnotationDisallowedList": [ "my.custom.annotation/1" ], # ServiceLabelDisallowedList is a list of labels that are not allowed to be propagated to Knative revisions # which prevents the reconciliation loop to be triggered if the labels is configured here are used. "serviceLabelDisallowedList": [ "my.custom.label.1" ] } # Example - setting custom resource inferenceService: |- { "resource": { "cpuLimit": "1", "memoryLimit": "2Gi", "cpuRequest": "1", "memoryRequest": "2Gi" } } # Example - setting custom resource inferenceService: |- { # resource contains the default resource configuration for the inference service. # you can override this configuration by specifying the resources in the inference service yaml. # If you want to unbound the resource (limits and requests), you can set the value to null or "" # or just remove the specific field from the config. "resource": { # cpuLimit is the limits.cpu to set for the inference service. "cpuLimit": "1", # memoryLimit is the limits.memory to set for the inference service. "memoryLimit": "2Gi", # cpuRequest is the requests.cpu to set for the inference service. "cpuRequest": "1", # memoryRequest is the requests.memory to set for the inference service. "memoryRequest": "2Gi" } } # ====================================== MultiNode CONFIGURATION ====================================== # Example multiNode: |- { "customGPUResourceTypeList": [ "custom.com/gpu" ] } # Example of multinode configuration multiNode: |- { # CustomGPUResourceTypeList is a list of custom GPU resource types intended to identify the GPU type of a resource, # not to restrict the user from using a specific GPU type. # The MultiNode runtime pod will dynamically add GPU resources based on the registered GPU types. "customGPUResourceTypeList": [ "custom.com/gpu" ] } # ====================================== OTelCollector CONFIGURATION ====================================== # Example opentelemetryCollector: |- { # scrapeInterval is the interval at which the OpenTelemetry Collector will scrape the metrics. "scrapeInterval": "5s", # metricScalerEndpoint is the endpoint from which the KEDA's ScaledObject will scrape the metrics. "metricScalerEndpoint": "keda-otel-scaler.keda.svc:4318", # metricReceiverEndpoint is the endpoint from which the OpenTelemetry Collector will scrape the metrics. "metricReceiverEndpoint": "keda-otel-scaler.keda.svc:4317" } # ====================================== AUTOSCALER CONFIGURATION ====================================== # Example autoscaler: |- { # scaleUpStabilizationWindowSeconds is the stabilization window in seconds for scale up. "scaleUpStabilizationWindowSeconds": "0", # scaleDownStabilizationWindowSeconds is the stabilization window in seconds for scale down. "scaleDownStabilizationWindowSeconds": "300" } # ====================================== STORAGE INITIALIZER CONFIGURATION ====================================== # Example storageInitializer: |- { "image" : "kserve/storage-initializer:latest", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", "cpuLimit": "1", "caBundleConfigMapName": "", "caBundleVolumeMountPath": "/etc/ssl/custom-certs", "enableModelcar": false, "cpuModelcar": "10m", "memoryModelcar": "15Mi" } storageInitializer: |- { # image contains the default storage initializer image uri. "image" : "kserve/storage-initializer:latest", # memoryRequest is the requests.memory to set for the storage initializer init container. "memoryRequest": "100Mi", # memoryLimit is the limits.memory to set for the storage initializer init container. "memoryLimit": "1Gi", # cpuRequest is the requests.cpu to set for the storage initializer init container. "cpuRequest": "100m", # cpuLimit is the limits.cpu to set for the storage initializer init container. "cpuLimit": "1", # caBundleConfigMapName is the ConfigMap will be copied to a user namespace for the storage initializer init container. "caBundleConfigMapName": "", # caBundleVolumeMountPath is the mount point for the configmap set by caBundleConfigMapName for the storage initializer init container. "caBundleVolumeMountPath": "/etc/ssl/custom-certs", # enableModelcar enabled allows you to directly access an OCI container image by # using a source URL with an "oci://" schema. "enableModelcar": false, # cpuModelcar is the cpu request and limit that is used for the passive modelcar container. It can be # set very low, but should be allowed by any Kubernetes LimitRange that might apply. "cpuModelcar": "10m", # cpuModelcar is the memory request and limit that is used for the passive modelcar container. It can be # set very low, but should be allowed by any Kubernetes LimitRange that might apply. "memoryModelcar": "15Mi", # uidModelcar is the UID under with which the modelcar process and the main container is running. # Some Kubernetes clusters might require this to be root (0). If not set the user id is left untouched (default) "uidModelcar": 10 } # ====================================== CREDENTIALS ====================================== # Example credentials: |- { "storageSpecSecretName": "storage-config", "storageSecretNameAnnotation": "serving.kserve.io/storageSecretName", "gcs": { "gcsCredentialFileName": "gcloud-application-credentials.json" }, "s3": { "s3AccessKeyIDName": "AWS_ACCESS_KEY_ID", "s3SecretAccessKeyName": "AWS_SECRET_ACCESS_KEY", "s3Endpoint": "", "s3UseHttps": "", "s3Region": "", "s3VerifySSL": "", "s3UseVirtualBucket": "", "s3UseAccelerate": "", "s3UseAnonymousCredential": "", "s3CABundleConfigMap": "", "s3CABundle": "" } } # This is a global configuration used for downloading models from the cloud storage. # You can override this configuration by specifying the annotations on service account or static secret. # https://kserve.github.io/website/master/modelserving/storage/s3/s3/ # For a quick reference about AWS ENV variables: # AWS Cli: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html # Boto: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables # # The `s3AccessKeyIDName` and `s3SecretAccessKeyName` fields are only used from this configmap when static credentials (IAM User Access Key Secret) # are used as the authentication method for AWS S3. # The rest of the fields are used in both authentication methods (IAM Role for Service Account & IAM User Access Key Secret) if a non-empty value is provided. credentials: |- { # storageSpecSecretName contains the secret name which has the credentials for downloading the model. # This option is used when specifying the storage spec on isvc yaml. "storageSpecSecretName": "storage-config", # The annotation can be specified on isvc yaml to allow overriding with the secret name reference from the annotation value. # When using storageUri the order of the precedence is: secret name reference annotation > secret name references from service account # When using storageSpec the order of the precedence is: secret name reference annotation > storageSpecSecretName in configmap # Configuration for google cloud storage "gcs": { # gcsCredentialFileName specifies the filename of the gcs credential "gcsCredentialFileName": "gcloud-application-credentials.json" }, # Configuration for aws s3 storage. This add the corresponding environmental variables to the storage initializer init container. # For more info on s3 storage see https://kserve.github.io/website/master/modelserving/storage/s3/s3/ "s3": { # s3AccessKeyIDName specifies the s3 access key id name "s3AccessKeyIDName": "AWS_ACCESS_KEY_ID", # s3SecretAccessKeyName specifies the s3 secret access key name "s3SecretAccessKeyName": "AWS_SECRET_ACCESS_KEY", # s3Endpoint specifies the s3 endpoint "s3Endpoint": "", # s3UseHttps controls whether to use secure https or unsecure http to download models. # Allowed values are 0 and 1. "s3UseHttps": "", # s3Region specifies the region of the bucket. "s3Region": "", # s3VerifySSL controls whether to verify the tls/ssl certificate. "s3VerifySSL": "", # s3UseVirtualBucket configures whether it is a virtual bucket or not. "s3UseVirtualBucket": "", # s3UseAccelerate configures whether to use transfer acceleration. "s3UseAccelerate": "", # s3UseAnonymousCredential configures whether to use anonymous credentials to download the model or not. "s3UseAnonymousCredential": "", # s3CABundleConfigMap specifies the mounted CA bundle config map name. "s3CABundleConfigMap": "", # s3CABundle specifies the full path (mount path + file name) for the mounted config map data when used with a configured CA bundle config map. # s3CABundle specifies the path to a certificate bundle to use for HTTPS certificate validation when used absent of a configured CA bundle config map. "s3CABundle": "" } } # ====================================== INGRESS CONFIGURATION ====================================== # Example ingress: |- { "enableGatewayApi": false, "kserveIngressGateway": "kserve/kserve-ingress-gateway", "ingressGateway" : "knative-serving/knative-ingress-gateway", "localGateway" : "knative-serving/knative-local-gateway", "localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local", "ingressDomain" : "example.com", "additionalIngressDomains": ["additional-example.com", "additional-example-1.com"], "ingressClassName" : "istio", "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}", "urlScheme": "http", "disableIstioVirtualHost": false, "disableIngressCreation": false, "disableHTTPRouteTimeout": false } ingress: |- { # enableGatewayApi specifies whether to use Gateway API instead of Ingress to serve external traffic. "enableGatewayApi": false, # KServe implements [Gateway API](https://gateway-api.sigs.k8s.io/) to serve external traffic. # By default, KServe configures a default gateway to serve external traffic. # But, KServe can be configured to use a custom gateway by modifying this configuration. # The gateway should be specified in format / # NOTE: This configuration only applicable for raw deployment. "kserveIngressGateway": "kserve/kserve-ingress-gateway", # ingressGateway specifies the ingress gateway to serve external traffic. # The gateway should be specified in format / # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer. "ingressGateway" : "knative-serving/knative-ingress-gateway", # knativeLocalGatewayService specifies the hostname of the Knative's local gateway service. # The default KServe configurations are re-using the Istio local gateways for Knative. In this case, this # knativeLocalGatewayService field can be left unset. When unset, the value of "localGatewayService" will be used. # However, sometimes it may be better to have local gateways specifically for KServe (e.g. when enabling strict mTLS in Istio). # Under such setups where KServe is needed to have its own local gateways, the values of the "localGateway" and # "localGatewayService" should point to the KServe local gateways. Then, this knativeLocalGatewayService field # should point to the Knative's local gateway service. # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer. "knativeLocalGatewayService": "", # localGateway specifies the gateway which handles the network traffic within the cluster. # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer. "localGateway" : "knative-serving/knative-local-gateway", # localGatewayService specifies the hostname of the local gateway service. # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer. "localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local", # ingressDomain specifies the domain name which is used for creating the url. # If ingressDomain is empty then example.com is used as default domain. # NOTE: This configuration only applicable for raw deployment. "ingressDomain" : "example.com", # additionalIngressDomains specifies the additional domain names which are used for creating the url. "additionalIngressDomains": ["additional-example.com", "additional-example-1.com"] # ingressClassName specifies the ingress controller to use for ingress traffic. # This is optional and if omitted the default ingress in the cluster is used. # https://kubernetes.io/docs/concepts/services-networking/ingress/#default-ingress-class # NOTE: This configuration only applicable for raw deployment. "ingressClassName" : "istio", # domainTemplate specifies the template for generating domain/url for each inference service by combining variable from: # Name of the inference service ( {{ .Name}} ) # Namespace of the inference service ( {{ .Namespace }} ) # Annotation of the inference service ( {{ .Annotations.key }} ) # Label of the inference service ( {{ .Labels.key }} ) # IngressDomain ( {{ .IngressDomain }} ) # If domain template is empty the default template {{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }} is used. # NOTE: This configuration only applicable for raw deployment. "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}", # urlScheme specifies the url scheme to use for inference service and inference graph. # If urlScheme is empty then by default http is used. "urlScheme": "http", # disableIstioVirtualHost controls whether to use istio as network layer. # By default istio is used as the network layer. When DisableIstioVirtualHost is true, KServe does not # create the top level virtual service thus Istio is no longer required for serverless mode. # By setting this field to true, user can use other networking layers supported by knative. # For more info https://github.com/kserve/kserve/pull/2380, https://kserve.github.io/website/master/admin/serverless/kourier_networking/. # NOTE: This configuration is only applicable to serverless deployment. "disableIstioVirtualHost": false, # disableIngressCreation controls whether to disable ingress creation for raw deployment mode. "disableIngressCreation": false, # disableHTTPRouteTimeout controls whether to omit the timeout field from HTTPRoute rules. # Set to true for Gateway controllers (e.g. GKE Gateway) that do not support the optional timeouts field. "disableHTTPRouteTimeout": false, # pathTemplate specifies the template for generating path based url for each inference service. # The following variables can be used in the template for generating url. # Name of the inference service ( {{ .Name}} ) # Namespace of the inference service ( {{ .Namespace }} ) # For more info https://github.com/kserve/kserve/issues/2257. # NOTE: This configuration only applicable to serverless deployment. "pathTemplate": "/serving/{{ .Namespace }}/{{ .Name }}" } # ====================================== LOGGER CONFIGURATION ====================================== # Example logger: |- { "image" : "kserve/agent:latest", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", "cpuLimit": "1", "defaultUrl": "http://default-broker" } logger: |- { # image contains the default logger image uri. "image" : "kserve/agent:latest", # memoryRequest is the requests.memory to set for the logger container. "memoryRequest": "100Mi", # memoryLimit is the limits.memory to set for the logger container. "memoryLimit": "1Gi", # cpuRequest is the requests.cpu to set for the logger container. "cpuRequest": "100m", # cpuLimit is the limits.cpu to set for the logger container. "cpuLimit": "1", # defaultUrl specifies the default logger url. If logger is not specified in the resource this url is used. "defaultUrl": "http://default-broker" } # ====================================== BATCHER CONFIGURATION ====================================== # Example batcher: |- { "image" : "kserve/agent:latest", "memoryRequest": "1Gi", "memoryLimit": "1Gi", "cpuRequest": "1", "cpuLimit": "1", "maxBatchSize": "32", "maxLatency": "5000" } batcher: |- { # image contains the default batcher image uri. "image" : "kserve/agent:latest", # memoryRequest is the requests.memory to set for the batcher container. "memoryRequest": "1Gi", # memoryLimit is the limits.memory to set for the batcher container. "memoryLimit": "1Gi", # cpuRequest is the requests.cpu to set for the batcher container. "cpuRequest": "1", # cpuLimit is the limits.cpu to set for the batcher container. "cpuLimit": "1" # maxBatchSize is the default maximum batch size for batcher. "maxBatchSize": "32", # maxLatency is the default maximum latency in milliseconds for batcher to wait and collect the batch. "maxLatency": "5000" } # ====================================== AGENT CONFIGURATION ====================================== # Example agent: |- { "image" : "kserve/agent:latest", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", "cpuLimit": "1" } agent: |- { # image contains the default agent image uri. "image" : "kserve/agent:latest", # memoryRequest is the requests.memory to set for the agent container. "memoryRequest": "100Mi", # memoryLimit is the limits.memory to set for the agent container. "memoryLimit": "1Gi", # cpuRequest is the requests.cpu to set for the agent container. "cpuRequest": "100m", # cpuLimit is the limits.cpu to set for the agent container. "cpuLimit": "1" } # ====================================== ROUTER CONFIGURATION ====================================== # Example router: |- { "image" : "kserve/router:latest", "memoryRequest": "100Mi", "memoryLimit": "1Gi", "cpuRequest": "100m", "cpuLimit": "1", "headers": { "propagate": [] }, "imagePullPolicy": "IfNotPresent", "imagePullSecrets": ["docker-secret"] } # router is the implementation of inference graph. router: |- { # image contains the default router image uri. "image" : "kserve/router:latest", # memoryRequest is the requests.memory to set for the router container. "memoryRequest": "100Mi", # memoryLimit is the limits.memory to set for the router container. "memoryLimit": "1Gi", # cpuRequest is the requests.cpu to set for the router container. "cpuRequest": "100m", # cpuLimit is the limits.cpu to set for the router container. "cpuLimit": "1", # Propagate the specified headers to all the steps specified in an InferenceGraph. # You can either specify the exact header names or use [Golang supported regex patterns] # (https://pkg.go.dev/regexp/syntax@go1.21.3#hdr-Syntax) to propagate multiple headers. "headers": { "propagate": [ "Authorization", "Test-Header-*", "*Trace-Id*" ] } # imagePullPolicy specifies when the router image should be pulled from registry. "imagePullPolicy": "IfNotPresent", # # imagePullSecrets specifies the list of secrets to be used for pulling the router image from registry. # https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ "imagePullSecrets": ["docker-secret"] } # ====================================== DEPLOYMENT CONFIGURATION ====================================== # Example deploy: |- { "defaultDeploymentMode": "Serverless", "deploymentRolloutStrategy": { "defaultRollout": { "maxSurge": "1", "maxUnavailable": "1" } } } deploy: |- { # defaultDeploymentMode specifies the default deployment mode of the kserve. The supported values are # Standard and Knative. Users can override the deployment mode at service level # by adding the annotation serving.kserve.io/deploymentMode. # "defaultDeploymentMode": "Standard", # deploymentRolloutStrategy specifies the default rollout strategy for the Standard deployment mode # "deploymentRolloutStrategy": { # defaultRollout specifies the default rollout configuration using Kubernetes deployment strategy # "defaultRollout": { # maxSurge specifies the maximum number of pods that can be created above the desired replica count # Can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%) # "maxSurge": "1", # maxUnavailable specifies the maximum number of pods that can be unavailable during the update # Can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%) # "maxUnavailable": "1" # } # } } # ====================================== SERVICE CONFIGURATION ====================================== # Example service: |- { "serviceClusterIPNone": false } service: |- { # ServiceClusterIPNone is a boolean flag to indicate if the service should have a clusterIP set to None. # If the DeploymentMode is Raw, the default value for ServiceClusterIPNone if not set is false # "serviceClusterIPNone": false } # ====================================== METRICS CONFIGURATION ====================================== # Example metricsAggregator: |- { "enableMetricAggregation": "false", "enablePrometheusScraping" : "false" } # For more info see https://github.com/kserve/kserve/blob/master/qpext/README.md metricsAggregator: |- { # enableMetricAggregation configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every # service with the specified boolean value. If true enables metric aggregation in queue-proxy by setting env vars in the queue proxy container # to configure scraping ports. "enableMetricAggregation": "false", # enablePrometheusScraping configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every # service with the specified boolean value. If true, prometheus annotations are added to the pod. If serving.kserve.io/enable-metric-aggregation is false, # the prometheus port is set with the default prometheus scraping port 9090, otherwise the prometheus port annotation is set with the metric aggregation port. "enablePrometheusScraping" : "false" } # ====================================== LOCALMODEL CONFIGURATION ====================================== # Example localModel: |- { "enabled": false, # jobNamespace specifies the namespace where the download job will be created. "jobNamespace": "kserve-localmodel-jobs", # defaultJobImage specifies the default image used for the download job. "defaultJobImage" : "kserve/storage-initializer:latest", # Kubernetes modifies the filesystem group ID on the attached volume. "fsGroup": 1000, # TTL for the download job after it is finished. "jobTTLSecondsAfterFinished": 3600, # The frequency at which the local model agent reconciles the local models # This is to detect if models are missing from local disk "reconcilationFrequencyInSecs": 60, # This is to disable localmodel pv and pvc management for namespaces without isvcs "disableVolumeManagement": false } agent: |- { "cpuLimit": "1", "cpuRequest": "100m", "image": "quay.io/opendatahub/kserve-agent@sha256:f1e6c59752bf9c5d5ef383777636f8bf6f11e2b73d04072559a0fe8e69dc54d3", "memoryLimit": "1Gi", "memoryRequest": "100Mi" } autoscaler: |- { "scaleUpStabilizationWindowSeconds": "0", "scaleDownStabilizationWindowSeconds": "300" } autoscaling-wva-controller-config: |- { "prometheus": { "url": "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091", "authModes": "bearer", "triggerAuthName": "ai-inference-keda-thanos", "triggerAuthKind": "ClusterTriggerAuthentication" } } batcher: |- { "cpuLimit": "1", "cpuRequest": "1", "image": "quay.io/opendatahub/kserve-agent@sha256:f1e6c59752bf9c5d5ef383777636f8bf6f11e2b73d04072559a0fe8e69dc54d3", "memoryLimit": "1Gi", "memoryRequest": "1Gi" } credentials: |- { "storageSpecSecretName": "storage-config", "storageSecretNameAnnotation": "serving.kserve.io/storageSecretName", "gcs": { "gcsCredentialFileName": "gcloud-application-credentials.json" }, "s3": { "s3AccessKeyIDName": "AWS_ACCESS_KEY_ID", "s3SecretAccessKeyName": "AWS_SECRET_ACCESS_KEY", "s3Endpoint": "", "s3UseHttps": "", "s3Region": "", "s3VerifySSL": "", "s3UseVirtualBucket": "", "s3UseAccelerate": "", "s3UseAnonymousCredential": "", "s3CABundleConfigMap": "odh-kserve-custom-ca-bundle", "s3CABundle": "/etc/ssl/custom-certs/cabundle.crt" } } deploy: |- { "defaultDeploymentMode": "RawDeployment" } explainers: '{}' inferenceService: |- { "serviceAnnotationDisallowedList": [ "autoscaling.knative.dev/min-scale", "autoscaling.knative.dev/max-scale", "internal.serving.kserve.io/storage-initializer-sourceuri", "kubectl.kubernetes.io/last-applied-configuration", "security.opendatahub.io/enable-auth", "networking.knative.dev/visibility", "haproxy.router.openshift.io/timeout", "opendatahub.io/hardware-profile-name", "opendatahub.io/hardware-profile-namespace" ] } ingress: |- { "enableGatewayApi": false, "kserveIngressGateway": "openshift-ingress/openshift-ai-inference", "enableLLMInferenceServiceTLS": true, "ingressGateway": "knative-serving/knative-ingress-gateway", "knativeLocalGatewayService": "knative-local-gateway.istio-system.svc.cluster.local", "ingressService": "istio-ingressgateway.istio-system.svc.cluster.local", "localGateway": "istio-system/kserve-local-gateway", "localGatewayService": "kserve-local-gateway.istio-system.svc.cluster.local", "ingressDomain": "apps.685960d6-3e23-42d6-b160-ce69ad8f3dce.prod.konfluxeaas.com", "ingressClassName": "openshift-default", "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}", "urlScheme": "http", "disableIstioVirtualHost": false, "disableIngressCreation": true } localModel: |- { "enabled": false, "jobNamespace": "opendatahub", "defaultJobImage" : "REPLACE_IMAGE", "fsGroup": 1000, "localModelAgentImage": "REPLACE_IMAGE", "localModelAgentCpuRequest": "100m", "localModelAgentMemoryRequest": "200Mi", "localModelAgentCpuLimit": "100m", "localModelAgentMemoryLimit": "300Mi" } logger: |- { "cpuLimit": "1", "cpuRequest": "100m", "defaultUrl": "http://default-broker", "image": "quay.io/opendatahub/kserve-agent@sha256:f1e6c59752bf9c5d5ef383777636f8bf6f11e2b73d04072559a0fe8e69dc54d3", "memoryLimit": "1Gi", "memoryRequest": "100Mi" } metricsAggregator: |- { "enableMetricAggregation": "false", "enablePrometheusScraping" : "false" } oauthProxy: |- { "cpuLimit": "200m", "cpuRequest": "100m", "image": "quay.io/opendatahub/odh-kube-auth-proxy@sha256:dcb09fbabd8811f0956ef612a0c9ddd5236804b9bd6548a0647d2b531c9d01b3", "memoryLimit": "128Mi", "memoryRequest": "64Mi" } openshiftConfig: |- { "modelcachePermissionFixImage": "REPLACE_IMAGE" } opentelemetryCollector: |- { "scrapeInterval": "5s", "metricReceiverEndpoint": "keda-otel-scaler.keda.svc:4317", "metricScalerEndpoint": "keda-otel-scaler.keda.svc:4318", "resource": { "cpuLimit": "1", "memoryLimit": "2Gi", "cpuRequest": "200m", "memoryRequest": "512Mi" } } router: |- { "cpuLimit": "1", "cpuRequest": "100m", "headers": { "propagate": [ "Authorization" ] }, "image": "quay.io/opendatahub/kserve-router@sha256:66a46d5a6623872616c85560bba9e0c6b4eaf29ca86ba636e84212e5017220b2", "memoryLimit": "1Gi", "memoryRequest": "100Mi" } security: |- { "autoMountServiceAccountToken": false } service: |- { "serviceClusterIPNone": false } storageInitializer: |- { "cpuLimit": "1", "cpuModelcar": "10m", "cpuRequest": "100m", "enableModelcar": true, "image": "quay.io/opendatahub/kserve-storage-initializer@sha256:1e3c9913c801ea01a5ba1aef752627a7bb4ea1c0e22f7d100bc3ea27e07d693d", "memoryLimit": "24Gi", "memoryModelcar": "15Mi", "memoryRequest": "100Mi" } kind: ConfigMap metadata: creationTimestamp: "2026-04-23T16:40:59Z" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:data: f:_example: {} f:agent: {} f:autoscaler: {} f:autoscaling-wva-controller-config: {} f:batcher: {} f:credentials: {} f:deploy: {} f:explainers: {} f:inferenceService: {} f:localModel: {} f:logger: {} f:metricsAggregator: {} f:oauthProxy: {} f:openshiftConfig: {} f:opentelemetryCollector: {} f:router: {} f:security: {} f:service: {} f:storageInitializer: {} manager: kubectl operation: Apply time: "2026-04-23T16:40:59Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:data: f:ingress: {} manager: kubectl-patch operation: Update time: "2026-04-23T16:41:38Z" name: inferenceservice-config namespace: kserve resourceVersion: "12189" uid: f74b6423-e92d-4deb-8cca-8172460c971e - apiVersion: v1 data: kserve-agent: quay.io/opendatahub/kserve-agent@sha256:f1e6c59752bf9c5d5ef383777636f8bf6f11e2b73d04072559a0fe8e69dc54d3 kserve-controller: quay.io/opendatahub/kserve-controller@sha256:4180122cff2b4c85fb1ca779f3efa0423e8883200e2f4345be6b5bc0708b1ad0 kserve-llm-d: registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 kserve-llm-d-amd-rocm: registry.redhat.io/rhaiis/vllm-rocm-rhel9@sha256:d9a48add238cc095fa43eeee17c8c4d104de60c4dc623e0bc7f8c4b53b2b2e97 kserve-llm-d-ibm-spyre: registry.redhat.io/rhaiis/vllm-spyre-rhel9@sha256:80ae3e435a5be2c1f117f36599103ab05357917dd6e37f0df6613cb3ac2c13ea kserve-llm-d-inference-scheduler: quay.io/opendatahub/llm-d-inference-scheduler:odh-stable kserve-llm-d-intel-gaudi: registry.redhat.io/rhaii-early-access/vllm-gaudi-rhel9:3.4.0-ea.2 kserve-llm-d-nvidia-cuda: registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 kserve-llm-d-routing-sidecar: quay.io/opendatahub/llm-d-routing-sidecar:odh-stable kserve-llm-d-uds-tokenizer: quay.io/opendatahub/llm-d-kv-cache:v0.7.1 kserve-localmodel-controller: quay.io/opendatahub/odh-kserve-localmodel-controller:odh-stable kserve-localmodelnode-agent: quay.io/opendatahub/odh-kserve-localmodelnode-agent:odh-stable kserve-router: quay.io/opendatahub/kserve-router@sha256:66a46d5a6623872616c85560bba9e0c6b4eaf29ca86ba636e84212e5017220b2 kserve-storage-initializer: quay.io/opendatahub/kserve-storage-initializer@sha256:1e3c9913c801ea01a5ba1aef752627a7bb4ea1c0e22f7d100bc3ea27e07d693d kube-rbac-proxy: quay.io/opendatahub/odh-kube-auth-proxy@sha256:dcb09fbabd8811f0956ef612a0c9ddd5236804b9bd6548a0647d2b531c9d01b3 llmisvc-controller: quay.io/opendatahub/odh-kserve-llmisvc-controller:odh-stable kind: ConfigMap metadata: creationTimestamp: "2026-04-23T16:40:59Z" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:data: f:kserve-agent: {} f:kserve-controller: {} f:kserve-llm-d: {} f:kserve-llm-d-amd-rocm: {} f:kserve-llm-d-ibm-spyre: {} f:kserve-llm-d-inference-scheduler: {} f:kserve-llm-d-intel-gaudi: {} f:kserve-llm-d-nvidia-cuda: {} f:kserve-llm-d-routing-sidecar: {} f:kserve-llm-d-uds-tokenizer: {} f:kserve-localmodel-controller: {} f:kserve-localmodelnode-agent: {} f:kserve-router: {} f:kserve-storage-initializer: {} f:kube-rbac-proxy: {} f:llmisvc-controller: {} manager: kubectl operation: Apply time: "2026-04-23T16:40:59Z" name: kserve-parameters namespace: kserve resourceVersion: "11748" uid: 8b2650fa-59f1-450f-85e8-b4d2c96a549a - apiVersion: v1 data: ca.crt: | -----BEGIN CERTIFICATE----- MIIDPDCCAiSgAwIBAgIIKaRa/0+CQqcwDQYJKoZIhvcNAQELBQAwJjESMBAGA1UE CxMJb3BlbnNoaWZ0MRAwDgYDVQQDEwdyb290LWNhMB4XDTI2MDQyMzE2Mjg0NloX DTM2MDQyMDE2Mjg0NlowJjESMBAGA1UECxMJb3BlbnNoaWZ0MRAwDgYDVQQDEwdy b290LWNhMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA867lw4BVWHYs aWux8edeG9j0/7MdkMj4nyK+XD5aMHw2prIdvJKRDxBMFEVOhLfcWHIO/jn9WVFx R9z35YabpmqSlqgm2zTj4ZXg7wB8Gl/jMgcoKL0turfuIpKShmXqKSiHTJDpFTU+ 2ENLEc0FZyXUF7sg+wXKfzfyQdDLv54Na2OPYvcZr8dsv91esHo4SPZnFMacB+LN HL/lwJ51qXhqXSTzOjzN+keJCrxD5ohfzHM+FmjPqrqmUp7vgXqzULkgS1XYEcF1 AbN+JBseq6CfH30CULnbqsMkuoL+P47IM82Ox4qLiFtFuoDkSevxyJaHzlTRkojU 3vegFSPpswIDAQABo24wbDAOBgNVHQ8BAf8EBAMCAqQwDwYDVR0TAQH/BAUwAwEB /zBJBgNVHQ4EQgRAHi4tXTXAQJ1RT5nMnNsDJuB2zeofk4ylIEYLQV00pk6PWkGa vGhp7a14LOcuq5TnssyTNrNygMcUM+rL5uptXTANBgkqhkiG9w0BAQsFAAOCAQEA EvUH7z7Cu5rAEeiT64XvoIxzJEr258lM6ppYuv2Mgej5BVDlrVYVyhvgHnQ4wBlm 4o0+0NT0I93/T6Usg+gp+u5cN1SzI095JRQRX8fOXaJ4Nr1Iq5DML+JpKyFE6Kv6 BcHXeavdqDXq4gUtoKxsiqPMmbi2wwWIXLtb9GKv+1GmeaawHXjyaOcJzcpzG8bM NOmNW/AphLOcsc9oQiEVZj/Hpyi3p7qfiOgvQmjQ+G82B3YIkI3mj9kck63S/UqV YQfxkdm62ONSp/Lsy4+ivHeCrEjk41qM8Q7kbbTQ9iQbDzFb65GVSplx0Nj7KSVo Mh0rjmd6BF96xjoxdEvv0w== -----END CERTIFICATE----- -----BEGIN CERTIFICATE----- MIIEADCCAuigAwIBAgIIG7AsFVmGITUwDQYJKoZIhvcNAQELBQAwJjESMBAGA1UE CxMJb3BlbnNoaWZ0MRAwDgYDVQQDEwdyb290LWNhMB4XDTI2MDQyMzE2MjkyMloX DTI3MDQyMzE2MjkyMlowMDESMBAGA1UEChMJb3BlbnNoaWZ0MRowGAYDVQQDExFv cGVuc2hpZnQtaW5ncmVzczCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB AMZtrqpexggdqWdfbbAp2NVp/OLwinLNW501mAGU0S+n+9gxf3pM7jZ4qCIn44B6 NpH3pk4Bz+DukHutEv26d/ZDFnVNFPVDpuEwHwGdDF5MLHIfA3aRwcyEqxqkEtYz Iu9SXl9HpDi5IvMIq2B3TOzD2lFafiN5C9nZBhSPnkJI6SKABc8IFMeG7JZuVpvR HTNiMMuTEFf5j8EcyMTmJ3cqZUxMP9KrYzG2NyvxjjczQoVq25PHTcHFObNSRr6O O6PIyMuTs2ByZ4yEQR2xik4svHZJZNQl2d4QA+0X200gVfMA5YOzb6AbGHxYuTV1 D5ZvShnlpWWziQey6kIol5ECAwEAAaOCASYwggEiMA4GA1UdDwEB/wQEAwIFoDAd BgNVHSUEFjAUBggrBgEFBQcDAgYIKwYBBQUHAwEwDAYDVR0TAQH/BAIwADBJBgNV HQ4EQgRA8Y5hJ4REInX+O/4bOViY0IteW48P6L4JZU2NyiUj6ryKgKd8MtUp9lCL n4oEwsYczV/s273CkmgsrwAXa9yxMjBLBgNVHSMERDBCgEAeLi1dNcBAnVFPmcyc 2wMm4HbN6h+TjKUgRgtBXTSmTo9aQZq8aGntrXgs5y6rlOeyzJM2s3KAxxQz6svm 6m1dMEsGA1UdEQREMEKCQCouYXBwcy42ODU5NjBkNi0zZTIzLTQyZDYtYjE2MC1j ZTY5YWQ4ZjNkY2UucHJvZC5rb25mbHV4ZWFhcy5jb20wDQYJKoZIhvcNAQELBQAD ggEBAMk9aRXzhrFSU1m/y3KRbpqzpSZz7y+uKn8Aec8CDxSyI8Cbi/PTXYzdrNn1 fNJSpXoCWmITBxomfeQp5/Pnjig1K8MQ8Gbona4SBFbB91seUA6SdYyES4Q4JJT+ Ae5O/Y/4lOUh+C3z4khHjJR5U8q0id9L4MY8JEZTDSbzejQQeIg91TsxidtOwVT5 R/2nhyqF7dF9/nxuVpOQx+9FRL7Amuo4jfuRfQjzmKHQq17A2BpGUx1EN5zMhd/N aerYxHqJS0r+D3S76jF94wQ3StBtmW9UB59cSp33wvMHBWZjA/GfLVXxO5dZH+ya Dp5KuDbNBHu8d4WZZtCcSCifF5w= -----END CERTIFICATE----- kind: ConfigMap metadata: annotations: kubernetes.io/description: Contains a CA bundle that can be used to verify the kube-apiserver when using internal endpoints such as the internal service IP or kubernetes.default.svc. No other usage is guaranteed across distributions of Kubernetes clusters. creationTimestamp: "2026-04-23T16:40:44Z" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:data: .: {} f:ca.crt: {} f:metadata: f:annotations: .: {} f:kubernetes.io/description: {} manager: kube-controller-manager operation: Update time: "2026-04-23T16:40:44Z" name: kube-root-ca.crt namespace: kserve resourceVersion: "11552" uid: b14167b4-5a2c-4a4c-a0f9-7555fa12d9c2 - apiVersion: v1 data: cabundle.crt: |- -----BEGIN CERTIFICATE----- MIIDUTCCAjmgAwIBAgIIdCtbzthRtI4wDQYJKoZIhvcNAQELBQAwNjE0MDIGA1UE Awwrb3BlbnNoaWZ0LXNlcnZpY2Utc2VydmluZy1zaWduZXJAMTc3Njk2MjEzNjAe Fw0yNjA0MjMxNjM1MzZaFw0yODA2MjExNjM1MzdaMDYxNDAyBgNVBAMMK29wZW5z aGlmdC1zZXJ2aWNlLXNlcnZpbmctc2lnbmVyQDE3NzY5NjIxMzYwggEiMA0GCSqG SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCn+D1c7/T1LZsAQJ+FEoUrmqJ8yuOxvoa8 7M0rnmqBu1lb+uZQtynLmnbSCoqoEDPWl2aUKr0ZlUkgAOUj6rStcWzoIq+FfXgP 9wWqtLiudYsLp4hbNLw1/BbHOCKKzdmoUq0d5PAXAjXF+A/28Z0sf7egwkClNG2S eRwS5SKvB7YjZdU0A7s/9PhyOvL7dyPNTnq304B8+SmE90ZcEzbiN11q4GN9rcml y75+pLglAAd4EJW4CExrmRa/TTCmqddVLyaSVDH1mbOsQPcMeXgpeGkEYPk7TAci UK4M63b8HDxIrTJkIPlhka/xKzAdKw7W1UZY1n4J9Akup3+RVGo3AgMBAAGjYzBh MA4GA1UdDwEB/wQEAwICpDAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBQm4vBE rR9zUdR44jE9DalC756tXDAfBgNVHSMEGDAWgBQm4vBErR9zUdR44jE9DalC756t XDANBgkqhkiG9w0BAQsFAAOCAQEAi9vym1EAU3MUHs+nORjQw3pOqtKq7MxyE/7w //bVf8DqYQAVO92u1ltFFvQ/kFSgmCLc4J85j36FjYKpIfIek2KRrWz5q7/ebBj5 PXR4RP/gLd1dAJs8BILZ17mbVB2J8VBCib0EaxwdWlUz7edbILzO5+gez3kuc5TF o27pfcIwa4QJIJU+iP8/ceaV5aX5IQC50PVGuAkjOEusulfaWcLp+jgC/H7BG/h3 GnKztEiXmMv8PNinHiN21ejE9xYdd4SL+e1c96Olw27c3xb58p5vVdIV3JntT6aN 5/epFwv5VXk0xdG7F5r4s0r4uQKUlLoEbOBFCqRoIhaAoFyrYA== -----END CERTIFICATE----- kind: ConfigMap metadata: creationTimestamp: "2026-04-23T16:42:15Z" labels: opendatahub.io/managed: "true" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:data: .: {} f:cabundle.crt: {} f:metadata: f:labels: .: {} f:opendatahub.io/managed: {} manager: manager operation: Update time: "2026-04-23T16:42:15Z" name: odh-kserve-custom-ca-bundle namespace: kserve resourceVersion: "12624" uid: d2243ef1-b1de-48d7-b9c7-35064e74cc88 - apiVersion: v1 data: guardrails-detector-huggingface-runtime-image: quay.io/trustyai/guardrails-detector-huggingface-runtime:latest kserve-state: managed mlserver-image: quay.io/opendatahub/mlserver:fast modelregistry-state: removed nim-state: managed odh-model-controller: quay.io/opendatahub/odh-model-controller:fast odh-model-serving-api: quay.io/opendatahub/odh-model-controller:odh-model-serving-api-fast ovms-image: quay.io/opendatahub/openvino_model_server:2025.1-release ray-tls-generator-image: registry.redhat.io/ubi9/ubi-minimal:latest tgis-image: quay.io/opendatahub/text-generation-inference:fast vllm-cpu-image: quay.io/vllm/vllm:latest vllm-cpu-x86-image: quay.io/vllm/vllm:latest vllm-cuda-image: quay.io/vllm/vllm-cuda:latest vllm-gaudi-image: quay.io/opendatahub/vllm:fast-gaudi vllm-rocm-image: quay.io/vllm/vllm-rocm:latest vllm-spyre-image: quay.io/vllm/vllm:latest kind: ConfigMap metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"v1","data":{"guardrails-detector-huggingface-runtime-image":"quay.io/trustyai/guardrails-detector-huggingface-runtime:latest","kserve-state":"managed","mlserver-image":"quay.io/opendatahub/mlserver:fast","modelregistry-state":"removed","nim-state":"managed","odh-model-controller":"quay.io/opendatahub/odh-model-controller:fast","odh-model-serving-api":"quay.io/opendatahub/odh-model-controller:odh-model-serving-api-fast","ovms-image":"quay.io/opendatahub/openvino_model_server:2025.1-release","ray-tls-generator-image":"registry.redhat.io/ubi9/ubi-minimal:latest","tgis-image":"quay.io/opendatahub/text-generation-inference:fast","vllm-cpu-image":"quay.io/vllm/vllm:latest","vllm-cpu-x86-image":"quay.io/vllm/vllm:latest","vllm-cuda-image":"quay.io/vllm/vllm-cuda:latest","vllm-gaudi-image":"quay.io/opendatahub/vllm:fast-gaudi","vllm-rocm-image":"quay.io/vllm/vllm-rocm:latest","vllm-spyre-image":"quay.io/vllm/vllm:latest"},"kind":"ConfigMap","metadata":{"annotations":{},"name":"odh-model-controller-parameters","namespace":"kserve"}} creationTimestamp: "2026-04-23T16:42:11Z" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:data: .: {} f:guardrails-detector-huggingface-runtime-image: {} f:kserve-state: {} f:mlserver-image: {} f:modelregistry-state: {} f:nim-state: {} f:odh-model-controller: {} f:odh-model-serving-api: {} f:ovms-image: {} f:ray-tls-generator-image: {} f:tgis-image: {} f:vllm-cpu-image: {} f:vllm-cpu-x86-image: {} f:vllm-cuda-image: {} f:vllm-gaudi-image: {} f:vllm-rocm-image: {} f:vllm-spyre-image: {} f:metadata: f:annotations: .: {} f:kubectl.kubernetes.io/last-applied-configuration: {} manager: kubectl-client-side-apply operation: Update time: "2026-04-23T16:42:11Z" name: odh-model-controller-parameters namespace: kserve resourceVersion: "12490" uid: 72a15d2f-851d-492f-8394-0a67c0b4c47b - apiVersion: v1 data: service-ca.crt: | -----BEGIN CERTIFICATE----- MIIDUTCCAjmgAwIBAgIIdCtbzthRtI4wDQYJKoZIhvcNAQELBQAwNjE0MDIGA1UE Awwrb3BlbnNoaWZ0LXNlcnZpY2Utc2VydmluZy1zaWduZXJAMTc3Njk2MjEzNjAe Fw0yNjA0MjMxNjM1MzZaFw0yODA2MjExNjM1MzdaMDYxNDAyBgNVBAMMK29wZW5z aGlmdC1zZXJ2aWNlLXNlcnZpbmctc2lnbmVyQDE3NzY5NjIxMzYwggEiMA0GCSqG SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCn+D1c7/T1LZsAQJ+FEoUrmqJ8yuOxvoa8 7M0rnmqBu1lb+uZQtynLmnbSCoqoEDPWl2aUKr0ZlUkgAOUj6rStcWzoIq+FfXgP 9wWqtLiudYsLp4hbNLw1/BbHOCKKzdmoUq0d5PAXAjXF+A/28Z0sf7egwkClNG2S eRwS5SKvB7YjZdU0A7s/9PhyOvL7dyPNTnq304B8+SmE90ZcEzbiN11q4GN9rcml y75+pLglAAd4EJW4CExrmRa/TTCmqddVLyaSVDH1mbOsQPcMeXgpeGkEYPk7TAci UK4M63b8HDxIrTJkIPlhka/xKzAdKw7W1UZY1n4J9Akup3+RVGo3AgMBAAGjYzBh MA4GA1UdDwEB/wQEAwICpDAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBQm4vBE rR9zUdR44jE9DalC756tXDAfBgNVHSMEGDAWgBQm4vBErR9zUdR44jE9DalC756t XDANBgkqhkiG9w0BAQsFAAOCAQEAi9vym1EAU3MUHs+nORjQw3pOqtKq7MxyE/7w //bVf8DqYQAVO92u1ltFFvQ/kFSgmCLc4J85j36FjYKpIfIek2KRrWz5q7/ebBj5 PXR4RP/gLd1dAJs8BILZ17mbVB2J8VBCib0EaxwdWlUz7edbILzO5+gez3kuc5TF o27pfcIwa4QJIJU+iP8/ceaV5aX5IQC50PVGuAkjOEusulfaWcLp+jgC/H7BG/h3 GnKztEiXmMv8PNinHiN21ejE9xYdd4SL+e1c96Olw27c3xb58p5vVdIV3JntT6aN 5/epFwv5VXk0xdG7F5r4s0r4uQKUlLoEbOBFCqRoIhaAoFyrYA== -----END CERTIFICATE----- kind: ConfigMap metadata: annotations: service.beta.openshift.io/inject-cabundle: "true" creationTimestamp: "2026-04-23T16:40:44Z" managedFields: - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:data: {} f:metadata: f:annotations: .: {} f:service.beta.openshift.io/inject-cabundle: {} manager: kube-controller-manager operation: Update time: "2026-04-23T16:40:44Z" - apiVersion: v1 fieldsType: FieldsV1 fieldsV1: f:data: f:service-ca.crt: {} manager: service-ca-operator operation: Update time: "2026-04-23T16:40:44Z" name: openshift-service-ca.crt namespace: kserve resourceVersion: "11561" uid: c38c10a4-d676-48e4-b2df-71a2850b7114 kind: ConfigMapList metadata: resourceVersion: "41019"