{"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"8a227bd3-ed4f-4b39-9276-c8337b313e49","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"8a227bd3-ed4f-4b39-9276-c8337b313e49","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"8a227bd3-ed4f-4b39-9276-c8337b313e49","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"8a227bd3-ed4f-4b39-9276-c8337b313e49","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"8a227bd3-ed4f-4b39-9276-c8337b313e49","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"8a227bd3-ed4f-4b39-9276-c8337b313e49","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"8a227bd3-ed4f-4b39-9276-c8337b313e49","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"8a227bd3-ed4f-4b39-9276-c8337b313e49","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"reconcile","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"apply","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createGatewayInfrastructure"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createKubeAuthProxyInfrastructure"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:32Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"5855e953-8e2a-4343-8eac-3014cce05ec9","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:17:33Z","msg":"reconcile","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"384fa36c-8a08-42a7-9665-1f9f3986fccf"} {"level":"info","ts":"2026-04-22T21:17:33Z","msg":"apply","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"384fa36c-8a08-42a7-9665-1f9f3986fccf"} {"level":"info","ts":"2026-04-22T21:17:33Z","msg":"Executing action","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"384fa36c-8a08-42a7-9665-1f9f3986fccf","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/deployments.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:17:33Z","msg":"Reconciler error","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"384fa36c-8a08-42a7-9665-1f9f3986fccf","error":"provisioning failed: error fetching list of deployments: unable to list: opendatahub-monitoring because of unknown namespace for the cache","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"reconcile","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"apply","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createGatewayInfrastructure"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createKubeAuthProxyInfrastructure"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:17:40Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"62b88e89-8dc8-494f-8c90-8504e6884252","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:17:44Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"kuadrant-system","reconcileID":"6f943551-a84a-4df6-8eb3-926b219e7daf"} {"level":"info","ts":"2026-04-22T21:17:44Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"kuadrant-system","reconcileID":"d69e3a65-edff-40ba-aff7-a26ec68e8422"} {"level":"info","ts":"2026-04-22T21:17:44Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"kuadrant-system","reconcileID":"4ef67c37-7e00-4421-bd31-32f0ca8ff325"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"reconcile","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"apply","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createGatewayInfrastructure"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createKubeAuthProxyInfrastructure"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:18:27Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"0d17a88a-7b5e-4425-890d-9f92eed23734","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:19:43Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"models-as-a-service","reconcileID":"c203e853-8257-47c9-8cc9-c98ea046ca04"} {"level":"info","ts":"2026-04-22T21:19:43Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"models-as-a-service","reconcileID":"5d67ec9c-c0b3-457a-9b66-735f2af9e4db"} {"level":"info","ts":"2026-04-22T21:19:43Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"models-as-a-service","reconcileID":"1d8fe305-c241-4e6f-9d48-58ac1e551414"} {"level":"info","ts":"2026-04-22T21:19:45Z","msg":"reconcile","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"79e57856-fd90-48fd-9453-ae04907b8c5c"} {"level":"info","ts":"2026-04-22T21:19:45Z","msg":"apply","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"79e57856-fd90-48fd-9453-ae04907b8c5c"} {"level":"info","ts":"2026-04-22T21:19:45Z","msg":"Executing action","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"79e57856-fd90-48fd-9453-ae04907b8c5c","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/deployments.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:19:45Z","msg":"Reconciler error","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"79e57856-fd90-48fd-9453-ae04907b8c5c","error":"provisioning failed: error fetching list of deployments: unable to list: opendatahub-monitoring because of unknown namespace for the cache","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:20:09Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"llm","reconcileID":"5d649126-24dd-4c27-bf72-12d0e03e61fa"} {"level":"info","ts":"2026-04-22T21:20:09Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"llm","reconcileID":"ee57873b-e059-4b56-b431-a4ae313ce1f9"} {"level":"info","ts":"2026-04-22T21:20:09Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"llm","reconcileID":"cf9150a8-6520-46f8-a085-a79fbf068f9f"} {"level":"info","ts":"2026-04-22T21:20:28Z","msg":"reconcile","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"ea83b4ca-1252-4177-8184-2cbfeb88b40b"} {"level":"info","ts":"2026-04-22T21:20:28Z","msg":"apply","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"ea83b4ca-1252-4177-8184-2cbfeb88b40b"} {"level":"info","ts":"2026-04-22T21:20:28Z","msg":"Executing action","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"ea83b4ca-1252-4177-8184-2cbfeb88b40b","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/deployments.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:20:28Z","msg":"Reconciler error","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"ea83b4ca-1252-4177-8184-2cbfeb88b40b","error":"provisioning failed: error fetching list of deployments: unable to list: opendatahub-monitoring because of unknown namespace for the cache","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:21:44Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"premium-users-namespace","reconcileID":"44f43c85-0148-4b98-ac38-4c57345684c6"} {"level":"info","ts":"2026-04-22T21:21:44Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"premium-users-namespace","reconcileID":"9a9333c9-9507-44b1-99dd-82717d7720be"} {"level":"info","ts":"2026-04-22T21:21:44Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"premium-users-namespace","reconcileID":"e8f3e16e-fc38-42d3-b4f7-6a6dceb567d0"} {"level":"info","ts":"2026-04-22T21:21:45Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"maas-admin","reconcileID":"b18f41d6-c149-4998-b925-920065892d27"} {"level":"info","ts":"2026-04-22T21:21:45Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"maas-admin","reconcileID":"7321080f-a3e8-4d67-9bc1-15a74d4a1ce5"} {"level":"info","ts":"2026-04-22T21:21:45Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"maas-admin","reconcileID":"d82b698b-12c5-4d84-bdae-90f1266cdeca"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"reconcile","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"45356cd9-f463-4b8c-a526-676b278a4de7"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"apply","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"45356cd9-f463-4b8c-a526-676b278a4de7"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"45356cd9-f463-4b8c-a526-676b278a4de7","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/auth.initialize"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"45356cd9-f463-4b8c-a526-676b278a4de7","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"45356cd9-f463-4b8c-a526-676b278a4de7","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/auth.createDefaultGroup"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"45356cd9-f463-4b8c-a526-676b278a4de7","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/auth.managePermissions"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"45356cd9-f463-4b8c-a526-676b278a4de7","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"45356cd9-f463-4b8c-a526-676b278a4de7","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"reconcile","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"065d0d7e-92df-4fc5-bd48-a7a90376a532"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"apply","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"065d0d7e-92df-4fc5-bd48-a7a90376a532"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"065d0d7e-92df-4fc5-bd48-a7a90376a532","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/auth.initialize"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"065d0d7e-92df-4fc5-bd48-a7a90376a532","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"065d0d7e-92df-4fc5-bd48-a7a90376a532","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/auth.createDefaultGroup"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"065d0d7e-92df-4fc5-bd48-a7a90376a532","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/auth.managePermissions"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"065d0d7e-92df-4fc5-bd48-a7a90376a532","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:21:46Z","msg":"Executing action","controller":"auth","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Auth","Auth":{"name":"auth"},"namespace":"","name":"auth","reconcileID":"065d0d7e-92df-4fc5-bd48-a7a90376a532","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"reconcile","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"apply","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createGatewayInfrastructure"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createKubeAuthProxyInfrastructure"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:22:10Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"44467744-fae1-4bd8-8db2-38eb7363ed66","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"reconcile","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"apply","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createGatewayInfrastructure"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createKubeAuthProxyInfrastructure"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:22:25Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"33560c72-641a-4426-aa88-a720f223b1a1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:23:39Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-api-unwatched-ns","reconcileID":"c0970005-8119-47e6-8fbf-7a8d3d38a5f4"} {"level":"info","ts":"2026-04-22T21:23:39Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-api-unwatched-ns","reconcileID":"46f088a2-8262-4d45-84f6-7fa33565657a"} {"level":"info","ts":"2026-04-22T21:23:39Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-api-unwatched-ns","reconcileID":"8724ea50-3d07-4ca4-b173-25c3777b05a0"} {"level":"info","ts":"2026-04-22T21:24:25Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-unwatched-ns","reconcileID":"b50d9051-a46b-41aa-b9df-0f8c41212322"} {"level":"info","ts":"2026-04-22T21:24:25Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-unwatched-ns","reconcileID":"5613e91f-2f78-4339-9f17-c4307b5f895b"} {"level":"info","ts":"2026-04-22T21:24:25Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-unwatched-ns","reconcileID":"e5b67175-cf3b-4c90-b521-874eea1bc877"} {"level":"info","ts":"2026-04-22T21:24:55Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-modelref-beca4f","reconcileID":"9640a1b4-0e44-4e7d-9f67-57bf8abaa98f"} {"level":"info","ts":"2026-04-22T21:24:55Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-modelref-beca4f","reconcileID":"92c0fe98-cbce-4d76-baa9-c708acd633ae"} {"level":"info","ts":"2026-04-22T21:24:55Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-modelref-beca4f","reconcileID":"853f5513-a15d-4bf1-8547-cef0faf84dff"} {"level":"info","ts":"2026-04-22T21:25:25Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-modelref-9a9871","reconcileID":"a1c87646-b4df-4f39-8a61-0980127099e5"} {"level":"info","ts":"2026-04-22T21:25:25Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-modelref-9a9871","reconcileID":"86eeeca5-d4b7-483e-8086-ed807aa1c820"} {"level":"info","ts":"2026-04-22T21:25:25Z","msg":"Adding CA bundle configmap","controller":"cert-configmap-generator-controller","namespace":"","name":"e2e-modelref-9a9871","reconcileID":"9edf7c3f-8f86-4a25-9477-c25f0bfb150f"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:30:58Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:01Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1065e6ba-5a93-4799-85ed-2d1d58bb8bb1","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"2f8396a0-f326-4c38-8854-bc466ba541b1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"reconcile","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"25e2c112-344b-4c08-badf-af7b13b0dd54"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"apply","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"25e2c112-344b-4c08-badf-af7b13b0dd54"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"25e2c112-344b-4c08-badf-af7b13b0dd54","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/deployments.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:01Z","msg":"Reconciler error","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"25e2c112-344b-4c08-badf-af7b13b0dd54","error":"provisioning failed: error fetching list of deployments: unable to list: opendatahub-monitoring because of unknown namespace for the cache","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"error","ts":"2026-04-22T21:31:01Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"358181e8-69f1-4040-a5e5-583a0b5e4567","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c7bf338b-db84-46c2-b8c5-b75332622c3a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"error","ts":"2026-04-22T21:31:01Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"c76885f4-e935-4025-9bef-02955fef6d7a","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:01Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:02Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"b60be3c1-0c5e-486b-a9e8-4bef258a71cc","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"e7e3d813-41ed-4ff6-9f00-3eedd7b0ff9b","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:02Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"1d8a70ed-68a0-4e12-bd6d-d4c99946c833","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:02Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:02Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"ad23f161-9155-4523-9034-f49ff7a36843","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"54427cc5-1985-4a9c-b587-e9c892ff1e61","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"error","ts":"2026-04-22T21:31:03Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"d9bf5130-c327-454e-90de-c24c0b046a68","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:03Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:04Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"2d88162a-69e7-4ac8-bd61-783962c81115","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"a482c957-b82a-41e2-9782-df5a38ef8103","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:04Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"382cb444-c2fd-4db2-8f17-3ccf8d8a0090","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"772e46d5-d80c-41ed-a472-fcb6642dd4d5","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:04Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:05Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"3721913c-6756-4033-9fc2-8a722d74e6a9","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:06Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"bc9ea7aa-01de-48a4-acc8-89133ed54177","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:06Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"dcf437ab-32b1-45d8-9159-2dc801a53b3e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:09Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"47c80aaf-d9b4-4d8a-a142-d61582885e4c","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:09Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"78af0f03-9bd5-4e5e-acf5-1e288b983f93","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:14Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"3e0de8a1-7398-4f25-999c-18fae0cdf018","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:14Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"c8493a87-73b2-4c00-b406-58d9aeb03688","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:24Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:25Z","msg":"Reconciler error","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"44693994-e3a8-4580-bd8b-e4ecc71d15ce","error":"provisioning failed: failure deploying resource {map[apiVersion:serving.kserve.io/v1alpha1 kind:LLMInferenceServiceConfig metadata:map[annotations:map[internal.config.kubernetes.io/previousKinds:LLMInferenceServiceConfig internal.config.kubernetes.io/previousNames:kserve-config-llm-decode-template internal.config.kubernetes.io/previousNamespaces:opendatahub platform.opendatahub.io/instance.generation:1 platform.opendatahub.io/instance.name:default-kserve platform.opendatahub.io/instance.uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea platform.opendatahub.io/type:Open Data Hub platform.opendatahub.io/version:3.4.0-ea.1 serving.kserve.io/well-known-config:true] labels:map[app.kubernetes.io/part-of:kserve app.opendatahub.io/kserve:true platform.opendatahub.io/part-of:kserve] name:v3-4-0-ea-1-kserve-config-llm-decode-template namespace:opendatahub ownerReferences:[map[apiVersion:components.platform.opendatahub.io/v1alpha1 blockOwnerDeletion:%!s(bool=true) controller:%!s(bool=true) kind:Kserve name:default-kserve uid:c5b97c2e-9948-41e9-a264-e6ec7ba8e1ea]]] spec:map[template:map[containers:[map[args:[if [ \"$KSERVE_INFER_ROCE\" = \"true\" ]; then\n echo \"Trying to infer RoCE configs ... \"\n grep -H . /sys/class/infiniband/*/ports/*/gids/* 2>/dev/null\n grep -H . /sys/class/infiniband/*/ports/*/gid_attrs/types/* 2>/dev/null\n\n cat /proc/driver/nvidia/params\n\n KSERVE_INFER_IB_GID_INDEX_GREP=${KSERVE_INFER_IB_GID_INDEX_GREP:-\"RoCE v2\"}\n\n echo \"[Infer RoCE] Discovering active HCAs ...\"\n active_hcas=()\n # Loop through all mlx5 devices found in sysfs\n for hca_dir in /sys/class/infiniband/mlx5_*; do\n # Ensure it's a directory before proceeding\n if [ -d \"$hca_dir\" ]; then\n hca_name=$(basename \"$hca_dir\")\n port_state_file=\"$hca_dir/ports/1/state\" # Assume port 1\n type_file=\"$hca_dir/ports/1/gid_attrs/types/*\"\n\n echo \"[Infer RoCE] Check if the port state file ${port_state_file} exists and contains 'ACTIVE'\"\n if [ -f \"$port_state_file\" ] && grep -q \"ACTIVE\" \"$port_state_file\" && grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" ${type_file} 2>/dev/null; then\n echo \"[Infer RoCE] Found active HCA: $hca_name\"\n active_hcas+=(\"$hca_name\")\n else\n echo \"[Infer RoCE] Skipping inactive or down HCA: $hca_name\"\n fi\n fi\n done\n\n ucx_hcas=()\n for hca in \"${active_hcas[@]}\"; do\n ucx_hcas+=(\"${hca}:1\")\n done\n\n # Check if we found any active HCAs\n if [ ${#active_hcas[@]} -gt 0 ]; then\n # Join the array elements with a comma\n hcas=$(IFS=,; echo \"${active_hcas[*]}\")\n echo \"[Infer RoCE] Setting active HCAs: ${hcas}\"\n export NCCL_IB_HCA=${NCCL_IB_HCA:-${hcas}}\n export NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST:-${ucx_hcas}}\n export UCX_NET_DEVICES=${UCX_NET_DEVICES:-${ucx_hcas}}\n\n echo \"[Infer RoCE] NCCL_IB_HCA=${NCCL_IB_HCA}\"\n echo \"[Infer RoCE] NVSHMEM_HCA_LIST=${NVSHMEM_HCA_LIST}\"\n else\n echo \"[Infer RoCE] WARNING: No active RoCE HCAs found. NCCL_IB_HCA will not be set.\"\n fi\n\n if [ ${#active_hcas[@]} -gt 0 ]; then\n echo \"[Infer RoCE] Finding GID_INDEX for each active HCA (SR-IOV compatible)...\"\n\n # For SR-IOV environments, find the most common IPv4 RoCE v2 GID index across all HCAs\n declare -A gid_index_count\n declare -A hca_gid_index\n\n for hca_name in \"${active_hcas[@]}\"; do\n echo \"[Infer RoCE] Processing HCA: ${hca_name}\"\n\n # Find all RoCE v2 IPv4 GIDs for this HCA and count by index\n for tpath in /sys/class/infiniband/${hca_name}/ports/1/gid_attrs/types/*; do\n if grep -q \"${KSERVE_INFER_IB_GID_INDEX_GREP}\" \"$tpath\" 2>/dev/null; then\n idx=$(basename \"$tpath\")\n gid_file=\"/sys/class/infiniband/${hca_name}/ports/1/gids/${idx}\"\n # Check for IPv4 GID (contains ffff:)\n if [ -f \"$gid_file\" ] && grep -q \"ffff:\" \"$gid_file\"; then\n gid_value=$(cat \"$gid_file\" 2>/dev/null || echo \"\")\n echo \"[Infer RoCE] Found IPv4 RoCE v2 GID for ${hca_name}: index=${idx}, gid=${gid_value}\"\n hca_gid_index[\"${hca_name}\"]=\"${idx}\"\n gid_index_count[\"${idx}\"]=$((${gid_index_count[\"${idx}\"]} + 1))\n break # Use first found IPv4 GID per HCA\n fi\n fi\n done\n done\n\n # Find the most common GID index (most likely to be consistent across nodes)\n best_gid_index=\"\"\n max_count=0\n for idx in \"${!gid_index_count[@]}\"; do\n count=${gid_index_count[\"${idx}\"]}\n echo \"[Infer RoCE] GID_INDEX ${idx} found on ${count} HCAs\"\n if [ $count -gt $max_count ]; then\n max_count=$count\n best_gid_index=\"$idx\"\n fi\n done\n\n # Use deterministic fallback if counts are equal - prefer lower index number \n if [ ${#gid_index_count[@]} -gt 1 ]; then\n echo \"[Infer RoCE] Multiple GID indices found, selecting most common: ${best_gid_index}\"\n # If there's a tie, prefer index 3 as it's most common in SR-IOV setups\n if [ -n \"${gid_index_count['3']}\" ] && [ \"${gid_index_count['3']}\" -eq \"$max_count\" ]; then\n best_gid_index=\"3\"\n echo \"[Infer RoCE] Using deterministic fallback: GID_INDEX=3 (SR-IOV standard)\"\n fi\n fi\n\n # Check if GID_INDEX is already set via environment variables\n if [ -n \"${NCCL_IB_GID_INDEX}\" ]; then\n echo \"[Infer RoCE] Using pre-configured NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX} from environment\"\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$NCCL_IB_GID_INDEX}\n echo \"[Infer RoCE] Using hardcoded GID_INDEX=${NCCL_IB_GID_INDEX} for NCCL, NVSHMEM, and UCX\"\n elif [ -n \"$best_gid_index\" ]; then\n echo \"[Infer RoCE] Selected GID_INDEX: ${best_gid_index} (found on ${max_count} HCAs)\"\n\n export NCCL_IB_GID_INDEX=${NCCL_IB_GID_INDEX:-$best_gid_index}\n export NVSHMEM_IB_GID_INDEX=${NVSHMEM_IB_GID_INDEX:-$best_gid_index}\n export UCX_IB_GID_INDEX=${UCX_IB_GID_INDEX:-$best_gid_index}\n\n echo \"[Infer RoCE] Exported GID_INDEX=${best_gid_index} for NCCL, NVSHMEM, and UCX\"\n else\n echo \"[Infer RoCE] ERROR: No valid IPv4 ${KSERVE_INFER_IB_GID_INDEX_GREP} GID_INDEX found on any HCA.\"\n fi\n else\n echo \"[Infer RoCE] No active HCAs found, skipping GID_INDEX inference.\"\n fi\nfi\n\neval \"vllm serve /mnt/models \\\n --served-model-name \"{{ .Spec.Model.Name }}\" \\\n --port 8001 \\\n ${VLLM_ADDITIONAL_ARGS} \\\n --enable-ssl-refresh \\\n --ssl-certfile /var/run/kserve/tls/tls.crt \\\n --ssl-keyfile /var/run/kserve/tls/tls.key\"] command:[/bin/bash -c] env:[map[name:HOME value:/home] map[name:VLLM_LOGGING_LEVEL value:INFO] map[name:HF_HUB_CACHE value:/models]] image:registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=120) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:main ports:[map[containerPort:%!s(int64=8001) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=60) httpGet:map[path:/health port:%!s(int64=8001) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true) seccompProfile:map[type:RuntimeDefault]] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/home name:home] map[mountPath:/dev/shm name:dshm] map[mountPath:/models name:model-cache] map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] initContainers:[map[args:[--port=8000 --vllm-port=8001 --connector=nixlv2 --secure-proxy=true --cert-path=/var/run/kserve/tls --decoder-use-tls=true --prefiller-use-tls=true --enable-ssrf-protection=true --pool-group=inference.networking.x-k8s.io] env:[map[name:INFERENCE_POOL_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:SSL_CERT_DIR value:/var/run/kserve/tls:/var/run/secrets/kubernetes.io/serviceaccount:/etc/pki/tls/certs]] image:quay.io/opendatahub/llm-d-routing-sidecar:release-v0.4 imagePullPolicy:IfNotPresent livenessProbe:map[failureThreshold:%!s(int64=3) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=10)] name:llm-d-routing-sidecar ports:[map[containerPort:%!s(int64=8000) protocol:TCP]] readinessProbe:map[failureThreshold:%!s(int64=10) httpGet:map[path:/health port:%!s(int64=8000) scheme:HTTPS] initialDelaySeconds:%!s(int64=10) periodSeconds:%!s(int64=10) timeoutSeconds:%!s(int64=5)] resources:map[] restartPolicy:Always securityContext:map[allowPrivilegeEscalation:%!s(bool=false) capabilities:map[drop:[ALL]] readOnlyRootFilesystem:%!s(bool=false) runAsNonRoot:%!s(bool=true)] terminationMessagePath:/dev/termination-log terminationMessagePolicy:FallbackToLogsOnError volumeMounts:[map[mountPath:/var/run/kserve/tls name:tls-certs readOnly:%!s(bool=true)]]]] terminationGracePeriodSeconds:%!s(int64=30) volumes:[map[emptyDir:map[] name:home] map[emptyDir:map[medium:Memory sizeLimit:1Gi] name:dshm] map[emptyDir:map[] name:model-cache] map[name:tls-certs secret:map[secretName:{{ ChildName .ObjectMeta.Name `-kserve-self-signed-certs` }}]]]]]]}: apply failed serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig: unable to patch serving.kserve.io/v1alpha1, Kind=LLMInferenceServiceConfig opendatahub/v3-4-0-ea-1-kserve-config-llm-decode-template: Internal error occurred: failed calling webhook \"llminferenceserviceconfig.kserve-webhook-server.validator\": failed to call webhook: Post \"https://kserve-webhook-server-service.opendatahub.svc:443/validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig?timeout=10s\": no endpoints available for service \"kserve-webhook-server-service\"","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:25Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"b6e357da-6caf-49fd-b5d7-e01a9d1ebabf","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:30Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/deployments.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"fa6bef43-9140-4e3e-bdc6-0bbd505da60e","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"da066ef7-7ed1-42cc-a7fc-25977b51dee1","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"reconcile","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"1bac0cda-b84e-4c71-96a7-ac36f049ffda"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"apply","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"1bac0cda-b84e-4c71-96a7-ac36f049ffda"} {"level":"info","ts":"2026-04-22T21:31:31Z","msg":"Executing action","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"1bac0cda-b84e-4c71-96a7-ac36f049ffda","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/deployments.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:31:31Z","msg":"Reconciler error","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"1bac0cda-b84e-4c71-96a7-ac36f049ffda","error":"provisioning failed: error fetching list of deployments: unable to list: opendatahub-monitoring because of unknown namespace for the cache","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"reconcile","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"apply","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.initialize"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/releases.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.removeOwnershipFromUnmanagedResources"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.cleanUpTemplatedResources"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.customizeKserveConfigMap"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/components/kserve.(*componentHandler).NewComponentReconciler.func1"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/deployments.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"kserve","controllerGroup":"components.platform.opendatahub.io","controllerKind":"Kserve","Kserve":{"name":"default-kserve"},"namespace":"","name":"default-kserve","reconcileID":"480c30e7-db73-48c1-8886-b569aa2b67ff","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"reconcile","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"apply","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.initialize"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.checkPreConditions"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.updateStatus"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/datasciencecluster.provisionComponents"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:31:45Z","msg":"Executing action","controller":"datasciencecluster","controllerGroup":"datasciencecluster.opendatahub.io","controllerKind":"DataScienceCluster","DataScienceCluster":{"name":"default-dsc"},"namespace":"","name":"default-dsc","reconcileID":"d2fef748-b135-402a-b5d5-d0a74c943979","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"reconcile","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"apply","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createGatewayInfrastructure"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createKubeAuthProxyInfrastructure"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2360e757-cbc9-4da0-85c1-b5d97ae0c850","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"reconcile","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"apply","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createGatewayInfrastructure"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createKubeAuthProxyInfrastructure"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:11Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"2827b7f2-2393-485f-b923-bd07a3edd41a","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"reconcile","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"apply","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createGatewayInfrastructure"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createKubeAuthProxyInfrastructure"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createEnvoyFilter"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createNetworkPolicy"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.createOCPRoutes"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/template.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/services/gateway.syncGatewayConfigStatus"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/gc.(*Action).run-fm"} {"level":"info","ts":"2026-04-22T21:33:26Z","msg":"Executing action","controller":"gatewayconfig","controllerGroup":"services.platform.opendatahub.io","controllerKind":"GatewayConfig","GatewayConfig":{"name":"default-gateway"},"namespace":"","name":"default-gateway","reconcileID":"fbeaecda-e358-42cf-a88b-49609513b590","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler.(*dynamicWatchAction).run-fm"} {"level":"info","ts":"2026-04-22T21:34:13Z","msg":"reconcile","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"545999dc-3acd-4657-bdc2-b05e39c511be"} {"level":"info","ts":"2026-04-22T21:34:13Z","msg":"apply","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"545999dc-3acd-4657-bdc2-b05e39c511be"} {"level":"info","ts":"2026-04-22T21:34:13Z","msg":"Executing action","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"545999dc-3acd-4657-bdc2-b05e39c511be","action":"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/status/deployments.(*Action).run-fm"} {"level":"error","ts":"2026-04-22T21:34:13Z","msg":"Reconciler error","controller":"monitoring","controllerGroup":"services.platform.opendatahub.io","controllerKind":"Monitoring","Monitoring":{"name":"default-monitoring"},"namespace":"","name":"default-monitoring","reconcileID":"545999dc-3acd-4657-bdc2-b05e39c511be","error":"provisioning failed: error fetching list of deployments: unable to list: opendatahub-monitoring because of unknown namespace for the cache","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:347\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/opt/app-root/src/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.20.4/pkg/internal/controller/controller.go:255"}