{"status":"success","data":{"groups":[{"name":"node-tuning-operator.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-cluster-node-tuning-operator-node-tuning-operator-29c1c7ff-525d-478c-8b94-2539548fd34c.yaml","rules":[{"state":"inactive","name":"NTOPodsNotReady","query":"kube_pod_status_ready{condition=\"true\",namespace=\"openshift-cluster-node-tuning-operator\"} == 0","duration":1800,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Pod {{ $labels.pod }} is not ready.\nReview the \"Event\" objects in \"openshift-cluster-node-tuning-operator\" namespace for further details.\n","summary":"Pod {{ $labels.pod }} is not ready."},"alerts":[],"health":"ok","evaluationTime":0.000227826,"lastEvaluation":"2026-04-23T09:27:45.527305178Z","type":"alerting"},{"state":"inactive","name":"NTOPodLabelsUsed","query":"nto_pod_labels_used_info == 1","duration":1800,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"The Node Tuning Operator is using deprecated functionality. Using pod label matching has been discouraged since OCP 4.4 and this functionality will be removed in future versions. Please revise and adjust your configuration (Tuned custom resources).","summary":"The Node Tuning Operator is using deprecated functionality."},"alerts":[],"health":"ok","evaluationTime":0.000052063,"lastEvaluation":"2026-04-23T09:27:45.527539423Z","type":"alerting"},{"state":"inactive","name":"NTOInvalidTunedExist","query":"nto_invalid_tuned_exist_info == 1","duration":1800,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Invalid custom Tuned resource exists. View your custom Tuned resources and operator logs for further details.","summary":"Invalid custom Tuned resource exists."},"alerts":[],"health":"ok","evaluationTime":0.000036049,"lastEvaluation":"2026-04-23T09:27:45.527594501Z","type":"alerting"},{"state":"inactive","name":"NTODegraded","query":"nto_degraded_info == 1","duration":7200,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"The Node Tuning Operator is degraded. Review the \"node-tuning\" ClusterOperator object for further details.","summary":"The Node Tuning Operator is degraded."},"alerts":[],"health":"ok","evaluationTime":0.000034031,"lastEvaluation":"2026-04-23T09:27:45.527634027Z","type":"alerting"},{"name":"nto_custom_profiles:count","query":"count by (_id) (nto_profile_calculated_total{profile!~\"openshift\",profile!~\"openshift-control-plane\",profile!~\"openshift-node\"})","health":"ok","evaluationTime":0.000051578,"lastEvaluation":"2026-04-23T09:27:45.527670992Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000442483,"lastEvaluation":"2026-04-23T09:27:45.527283703Z"},{"name":"SamplesOperator","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-cluster-samples-operator-samples-operator-alerts-73b178de-2784-4fca-a8e7-815947678c8a.yaml","rules":[{"state":"inactive","name":"SamplesRetriesMissingOnImagestreamImportFailing","query":"sum(openshift_samples_failed_imagestream_import_info) \u003e sum(openshift_samples_retry_imagestream_import_total) - sum(openshift_samples_retry_imagestream_import_total offset 30m)","duration":7200,"keepFiringFor":0,"labels":{"namespace":"openshift-cluster-samples-operator","severity":"warning"},"annotations":{"description":"Samples operator is detecting problems with imagestream image imports, and the periodic retries of those\nimports are not occurring.  Contact support.  You can look at the \"openshift-samples\" ClusterOperator object\nfor details. Most likely there are issues with the external image registry hosting the images that need to\nbe investigated.  The list of ImageStreams that have failing imports are:\n{{ range query \"openshift_samples_failed_imagestream_import_info > 0\" }}\n  {{ .Labels.name }}\n{{ end }}\nHowever, the list of ImageStreams for which samples operator is retrying imports is:\nretrying imports:\n{{ range query \"openshift_samples_retry_imagestream_import_total > 0\" }}\n   {{ .Labels.imagestreamname }}\n{{ end }}\n","summary":"Samples operator is having problems with imagestream imports and its retries."},"alerts":[],"health":"ok","evaluationTime":0.000382827,"lastEvaluation":"2026-04-23T09:27:39.429877609Z","type":"alerting"},{"state":"inactive","name":"SamplesImagestreamImportFailing","query":"sum(openshift_samples_retry_imagestream_import_total) - sum(openshift_samples_retry_imagestream_import_total offset 30m) \u003e sum(openshift_samples_failed_imagestream_import_info)","duration":7200,"keepFiringFor":0,"labels":{"namespace":"openshift-cluster-samples-operator","severity":"warning"},"annotations":{"description":"Samples operator is detecting problems with imagestream image imports.  You can look at the \"openshift-samples\"\nClusterOperator object for details. Most likely there are issues with the external image registry hosting\nthe images that needs to be investigated.  Or you can consider marking samples operator Removed if you do not\ncare about having sample imagestreams available.  The list of ImageStreams for which samples operator is\nretrying imports:\n{{ range query \"openshift_samples_retry_imagestream_import_total > 0\" }}\n   {{ .Labels.imagestreamname }}\n{{ end }}\n","summary":"Samples operator is detecting problems with imagestream image imports"},"alerts":[],"health":"ok","evaluationTime":0.00019878,"lastEvaluation":"2026-04-23T09:27:39.430265895Z","type":"alerting"},{"state":"inactive","name":"SamplesDegraded","query":"openshift_samples_degraded_info == 1","duration":7200,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Samples could not be deployed and the operator is degraded. Review the \"openshift-samples\" ClusterOperator object for further details.\n","summary":"Samples operator is degraded."},"alerts":[],"health":"ok","evaluationTime":0.000051802,"lastEvaluation":"2026-04-23T09:27:39.430467873Z","type":"alerting"},{"state":"inactive","name":"SamplesInvalidConfig","query":"openshift_samples_invalidconfig_info == 1","duration":7200,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Samples operator has been given an invalid configuration.\n","summary":"Samples operator Invalid configuration"},"alerts":[],"health":"ok","evaluationTime":0.000039802,"lastEvaluation":"2026-04-23T09:27:39.430521931Z","type":"alerting"},{"state":"inactive","name":"SamplesMissingSecret","query":"openshift_samples_invalidsecret_info{reason=\"missing_secret\"} == 1","duration":7200,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Samples operator cannot find the samples pull secret in the openshift namespace.\n","summary":"Samples operator is not able to find secret"},"alerts":[],"health":"ok","evaluationTime":0.000045987,"lastEvaluation":"2026-04-23T09:27:39.430563951Z","type":"alerting"},{"state":"inactive","name":"SamplesMissingTBRCredential","query":"openshift_samples_invalidsecret_info{reason=\"missing_tbr_credential\"} == 1","duration":7200,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"The samples operator cannot find credentials for 'registry.redhat.io'. Many of the sample ImageStreams will fail to import unless the 'samplesRegistry' in the operator configuration is changed.\n","summary":"Samples operator is not able to find the credentials for registry"},"alerts":[],"health":"ok","evaluationTime":0.000041469,"lastEvaluation":"2026-04-23T09:27:39.430612775Z","type":"alerting"},{"state":"inactive","name":"SamplesTBRInaccessibleOnBoot","query":"openshift_samples_tbr_inaccessible_info == 1","duration":172800,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"One of two situations has occurred.  Either\nsamples operator could not access 'registry.redhat.io' during its initial installation and it bootstrapped as removed.\nIf this is expected, and stems from installing in a restricted network environment, please note that if you\nplan on mirroring images associated with sample imagestreams into a registry available in your restricted\nnetwork environment, and subsequently moving samples operator back to 'Managed' state, a list of the images\nassociated with each image stream tag from the samples catalog is\nprovided in the 'imagestreamtag-to-image' config map in the 'openshift-cluster-samples-operator' namespace to\nassist the mirroring process.\nOr, the use of allowed registries or blocked registries with global imagestream configuration will not allow\nsamples operator to create imagestreams using the default image registry 'registry.redhat.io'.\n","summary":"Samples operator is not able to access the registry on boot"},"alerts":[],"health":"ok","evaluationTime":0.000037935,"lastEvaluation":"2026-04-23T09:27:39.430656442Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000847545,"lastEvaluation":"2026-04-23T09:27:39.429848952Z"},{"name":"default-storage-classes.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-cluster-storage-operator-prometheus-81220ddb-f47d-4139-81a8-e756c3d42010.yaml","rules":[{"state":"inactive","name":"MultipleDefaultStorageClasses","query":"min_over_time(default_storage_class_count[5m]) \u003e 1","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Cluster storage operator monitors all storage classes configured in the cluster\nand checks there is not more than one default StorageClass configured.\n","message":"StorageClass count check is failing (there should not be more than one default StorageClass)","summary":"More than one default StorageClass detected."},"alerts":[],"health":"ok","evaluationTime":0.000198931,"lastEvaluation":"2026-04-23T09:27:42.014075762Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000223689,"lastEvaluation":"2026-04-23T09:27:42.014054006Z"},{"name":"kubernetes-storage","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-cluster-storage-operator-prometheus-81220ddb-f47d-4139-81a8-e756c3d42010.yaml","rules":[{"state":"inactive","name":"KubePersistentVolumeFillingUp","query":"(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) \u003c 0.03 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e 0 unless on (cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\",namespace=~\"(openshift-.*|kube-.*|default)\"} == 1 unless on (cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_alerts_k8s_io_kube_persistent_volume_filling_up=\"disabled\",namespace=~\"(openshift-.*|kube-.*|default)\"} == 1","duration":60,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md","summary":"PersistentVolume is filling up."},"alerts":[],"health":"ok","evaluationTime":0.000593596,"lastEvaluation":"2026-04-23T09:27:44.351729058Z","type":"alerting"},{"state":"inactive","name":"KubePersistentVolumeFillingUp","query":"(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) \u003c 0.15 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e 0 and predict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}[6h], 4 * 24 * 3600) \u003c 0 unless on (cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\",namespace=~\"(openshift-.*|kube-.*|default)\"} == 1 unless on (cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_alerts_k8s_io_kube_persistent_volume_filling_up=\"disabled\",namespace=~\"(openshift-.*|kube-.*|default)\"} == 1","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md","summary":"PersistentVolume is filling up."},"alerts":[],"health":"ok","evaluationTime":0.000503042,"lastEvaluation":"2026-04-23T09:27:44.352329667Z","type":"alerting"},{"state":"inactive","name":"KubePersistentVolumeInodesFillingUp","query":"(kubelet_volume_stats_inodes_free{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} / kubelet_volume_stats_inodes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) \u003c 0.03 and kubelet_volume_stats_inodes_used{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e 0 unless on (cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\",namespace=~\"(openshift-.*|kube-.*|default)\"} == 1 unless on (cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_alerts_k8s_io_kube_persistent_volume_filling_up=\"disabled\",namespace=~\"(openshift-.*|kube-.*|default)\"} == 1","duration":60,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md","summary":"PersistentVolumeInodes are filling up."},"alerts":[],"health":"ok","evaluationTime":0.000344517,"lastEvaluation":"2026-04-23T09:27:44.352836915Z","type":"alerting"},{"state":"inactive","name":"KubePersistentVolumeInodesFillingUp","query":"(kubelet_volume_stats_inodes_free{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} / kubelet_volume_stats_inodes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) \u003c 0.15 and kubelet_volume_stats_inodes_used{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e 0 and predict_linear(kubelet_volume_stats_inodes_free{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}[6h], 4 * 24 * 3600) \u003c 0 unless on (cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\",namespace=~\"(openshift-.*|kube-.*|default)\"} == 1 unless on (cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_alerts_k8s_io_kube_persistent_volume_filling_up=\"disabled\",namespace=~\"(openshift-.*|kube-.*|default)\"} == 1","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md","summary":"PersistentVolumeInodes are filling up."},"alerts":[],"health":"ok","evaluationTime":0.000429598,"lastEvaluation":"2026-04-23T09:27:44.353185672Z","type":"alerting"},{"state":"inactive","name":"KubePersistentVolumeErrors","query":"kube_persistentvolume_status_phase{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",phase=~\"Failed|Pending\"} \u003e 0","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.","summary":"PersistentVolume is having issues with provisioning."},"alerts":[],"health":"ok","evaluationTime":0.000093764,"lastEvaluation":"2026-04-23T09:27:44.353619377Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.002023683,"lastEvaluation":"2026-04-23T09:27:44.351691941Z"},{"name":"storage-operations.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-cluster-storage-operator-prometheus-81220ddb-f47d-4139-81a8-e756c3d42010.yaml","rules":[{"state":"inactive","name":"PodStartupStorageOperationsFailing","query":"increase(storage_operation_duration_seconds_count{operation_name=~\"volume_attach|volume_mount\",status!=\"success\"}[5m]) \u003e 0 and ignoring (status) (sum without (status) (increase(storage_operation_duration_seconds_count{operation_name=~\"volume_attach|volume_mount\",status=\"success\"}[5m]) or increase(storage_operation_duration_seconds_count{operation_name=~\"volume_attach|volume_mount\",status!=\"success\"}[5m]) * 0)) == 0","duration":300,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"Failing storage operation \"{{ $labels.operation_name }}\" of volume plugin {{ $labels.volume_plugin }} was preventing Pods{{ if $labels.node }} on node {{ $labels.node }}{{ end }}\nfrom starting for past 5 minutes.\n{{ if eq $labels.operation_name \"volume_mount\" -}}\nPlease investigate Pods that are \"ContainerCreating\" on the node: \"oc get pod --field-selector=spec.nodeName={{ $labels.node }} --all-namespaces | grep ContainerCreating\".\n{{- else if eq $labels.operation_name \"volume_attach\" -}}\nPlease investigate Pods that are \"ContainerCreating\" across all nodes: \"oc get pod --all-namespaces | grep ContainerCreating\".\nCheck volume attachment status: \"oc get volumeattachment\" and controller manager logs.\n{{- else -}}\nPlease investigate Pods that are \"ContainerCreating\"{{ if $labels.node }} on node {{ $labels.node }}{{ end }}.\n{{- end }}\nEvents of the Pods should contain exact error message: \"oc describe pod -n <pod namespace> <pod name>\".\n","summary":"Pods can't start because {{ $labels.operation_name }} of volume plugin {{ $labels.volume_plugin }} is permanently failing{{ if $labels.node }} on node {{ $labels.node }}{{ end }}."},"alerts":[],"health":"ok","evaluationTime":0.000500625,"lastEvaluation":"2026-04-23T09:27:33.684331031Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000516709,"lastEvaluation":"2026-04-23T09:27:33.684317458Z"},{"name":"storage-selinux.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-cluster-storage-operator-prometheus-81220ddb-f47d-4139-81a8-e756c3d42010.yaml","rules":[{"name":"cluster:volume_manager_selinux_pod_context_mismatch_total","query":"sum(volume_manager_selinux_pod_context_mismatch_warnings_total) + sum(volume_manager_selinux_pod_context_mismatch_errors_total)","health":"ok","evaluationTime":0.000288443,"lastEvaluation":"2026-04-23T09:27:59.572296184Z","type":"recording"},{"name":"cluster:volume_manager_selinux_volume_context_mismatch_warnings_total","query":"sum by (volume_plugin) (volume_manager_selinux_volume_context_mismatch_warnings_total{volume_plugin!~\".*-e2e-.*\"})","health":"ok","evaluationTime":0.000149202,"lastEvaluation":"2026-04-23T09:27:59.572592707Z","type":"recording"},{"name":"cluster:volume_manager_selinux_volume_context_mismatch_errors_total","query":"sum by (volume_plugin) (volume_manager_selinux_volume_context_mismatch_errors_total{volume_plugin!~\".*-e2e-.*\"})","health":"ok","evaluationTime":0.000090097,"lastEvaluation":"2026-04-23T09:27:59.572749709Z","type":"recording"},{"name":"cluster:volume_manager_selinux_volumes_admitted_total","query":"sum by (volume_plugin) (volume_manager_selinux_volumes_admitted_total{volume_plugin!~\".*-e2e-.*\"})","health":"ok","evaluationTime":0.000087687,"lastEvaluation":"2026-04-23T09:27:59.572845901Z","type":"recording"},{"name":"cluster:selinux_warning_controller_selinux_volume_conflict:count","query":"sum by (property) (selinux_warning_controller_selinux_volume_conflict{property=~\"SELinuxLabel|SELinuxChangePolicy\"})","health":"ok","evaluationTime":0.000091163,"lastEvaluation":"2026-04-23T09:27:59.572939169Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000764127,"lastEvaluation":"2026-04-23T09:27:59.572269092Z"},{"name":"openshift/console-operator","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-console-operator-cluster-monitoring-prometheus-rules-c60c0903-cdb1-4e16-84b5-d93be478fd0e.yaml","rules":[{"name":"cluster:console_auth_login_requests_total:sum","query":"sum(console_auth_login_requests_total)","health":"ok","evaluationTime":0.000184746,"lastEvaluation":"2026-04-23T09:27:36.442363515Z","type":"recording"},{"name":"cluster:console_auth_login_successes_total:sum","query":"sum by (role) (console_auth_login_successes_total)","health":"ok","evaluationTime":0.000097024,"lastEvaluation":"2026-04-23T09:27:36.442553261Z","type":"recording"},{"name":"cluster:console_auth_login_failures_total:sum","query":"sum by (reason) (console_auth_login_failures_total)","health":"ok","evaluationTime":0.00005457,"lastEvaluation":"2026-04-23T09:27:36.442653243Z","type":"recording"},{"name":"cluster:console_auth_logout_requests_total:sum","query":"sum by (reason) (console_auth_logout_requests_total)","health":"ok","evaluationTime":0.000049102,"lastEvaluation":"2026-04-23T09:27:36.442710303Z","type":"recording"},{"name":"cluster:console_usage_users:max","query":"max by (role) (console_usage_users)","health":"ok","evaluationTime":0.000045432,"lastEvaluation":"2026-04-23T09:27:36.442761872Z","type":"recording"},{"name":"cluster:console_plugins_info:max","query":"max by (name, state) (console_plugins_info)","health":"ok","evaluationTime":0.000053854,"lastEvaluation":"2026-04-23T09:27:36.442809448Z","type":"recording"},{"name":"cluster:console_customization_perspectives_info:max","query":"max by (name, state) (console_customization_perspectives_info)","health":"ok","evaluationTime":0.000044068,"lastEvaluation":"2026-04-23T09:27:36.442865797Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000589945,"lastEvaluation":"2026-04-23T09:27:36.442322461Z"},{"name":"openshift-dns.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-dns-operator-dns-43d3d869-9cd4-4f76-8beb-cefcd1d9056d.yaml","rules":[{"state":"inactive","name":"CoreDNSPanicking","query":"increase(coredns_panics_total[10m]) \u003e 0","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $value }} CoreDNS panics observed on {{ $labels.instance }}","summary":"CoreDNS panic"},"alerts":[],"health":"ok","evaluationTime":0.000274652,"lastEvaluation":"2026-04-23T09:27:49.978259489Z","type":"alerting"},{"state":"inactive","name":"CoreDNSHealthCheckSlow","query":"histogram_quantile(0.95, sum by (instance, le) (rate(coredns_health_request_duration_seconds_bucket[5m]))) \u003e 10","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"CoreDNS Health Checks are slowing down (instance {{ $labels.instance }})","summary":"CoreDNS health checks"},"alerts":[],"health":"ok","evaluationTime":0.000236329,"lastEvaluation":"2026-04-23T09:27:49.978541498Z","type":"alerting"},{"state":"inactive","name":"CoreDNSErrorsHigh","query":"(sum by (namespace) (rate(coredns_dns_responses_total{rcode=\"SERVFAIL\"}[5m])) / sum by (namespace) (rate(coredns_dns_responses_total[5m]))) \u003e 0.01","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-dns-operator/CoreDNSErrorsHigh.md","summary":"CoreDNS serverfail"},"alerts":[],"health":"ok","evaluationTime":0.000144691,"lastEvaluation":"2026-04-23T09:27:49.978781211Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000691723,"lastEvaluation":"2026-04-23T09:27:49.978236486Z"},{"name":"pvc-problem-detector.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-image-registry-image-registry-operator-alerts-5c4d4108-41dc-4d63-ac37-f26f7d61992c.yaml","rules":[{"state":"inactive","name":"ImageRegistryStorageReadOnly","query":"sum without (instance, pod, operation) (rate(imageregistry_storage_errors_total{code=\"READ_ONLY_FILESYSTEM\"}[5m])) \u003e 0","duration":600,"keepFiringFor":0,"labels":{"kubernetes_operator_part_of":"image-registry","severity":"warning"},"annotations":{"description":"The image registry storage is read-only. Read-only storage affects direct pushes to the image registry, and pull-through proxy caching. In the case of pull-through proxy caching, read-only storage is particularly important because without it the image registry won't be actually caching anything. Please verify your backing storage solution and make sure the volume mounted on the image-registry pods is writable to avoid potential outages.","message":"The image registry storage is read-only and no images will be committed to storage.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-image-registry-operator/ImageRegistryStorageReadOnly.md","summary":"The image registry storage is read-only and no images will be committed to storage."},"alerts":[],"health":"ok","evaluationTime":0.000321877,"lastEvaluation":"2026-04-23T09:27:59.192696317Z","type":"alerting"},{"state":"inactive","name":"ImageRegistryStorageFull","query":"sum without (instance, pod, operation) (rate(imageregistry_storage_errors_total{code=\"DEVICE_OUT_OF_SPACE\"}[5m])) \u003e 0","duration":600,"keepFiringFor":0,"labels":{"kubernetes_operator_part_of":"image-registry","severity":"warning"},"annotations":{"description":"The image registry storage disk is full. A full disk affects direct pushes to the image registry, and pull-through proxy caching. In the case of pull-through proxy caching, disk space is particularly important because without it the image registry won't be actually caching anything. Please verify your backing storage solution and make sure the volume mounted on the image-registry pods have enough free disk space to avoid potential outages.","message":"The image registry storage disk is full and no images will be committed to storage.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-image-registry-operator/ImageRegistryStorageFull.md","summary":"The image registry storage disk is full and no images will be committed to storage."},"alerts":[],"health":"ok","evaluationTime":0.000107355,"lastEvaluation":"2026-04-23T09:27:59.193026806Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000488794,"lastEvaluation":"2026-04-23T09:27:59.192648645Z"},{"name":"imageregistry.operations.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-image-registry-image-registry-rules-9c436d72-b2bf-4d2d-80c1-3f91ed759c6f.yaml","rules":[{"name":"imageregistry:operations_count:sum","query":"label_replace(label_replace(sum by (operation) (imageregistry_request_duration_seconds_count{operation=\"BlobStore.ServeBlob\"}), \"operation\", \"get\", \"operation\", \"(.+)\"), \"resource_type\", \"blob\", \"resource_type\", \"\")","health":"ok","evaluationTime":0.000223953,"lastEvaluation":"2026-04-23T09:27:50.988814997Z","type":"recording"},{"name":"imageregistry:operations_count:sum","query":"label_replace(label_replace(sum by (operation) (imageregistry_request_duration_seconds_count{operation=\"BlobStore.Create\"}), \"operation\", \"create\", \"operation\", \"(.+)\"), \"resource_type\", \"blob\", \"resource_type\", \"\")","health":"ok","evaluationTime":0.000106408,"lastEvaluation":"2026-04-23T09:27:50.989043455Z","type":"recording"},{"name":"imageregistry:operations_count:sum","query":"label_replace(label_replace(sum by (operation) (imageregistry_request_duration_seconds_count{operation=\"ManifestService.Get\"}), \"operation\", \"get\", \"operation\", \"(.+)\"), \"resource_type\", \"manifest\", \"resource_type\", \"\")","health":"ok","evaluationTime":0.000097999,"lastEvaluation":"2026-04-23T09:27:50.989152296Z","type":"recording"},{"name":"imageregistry:operations_count:sum","query":"label_replace(label_replace(sum by (operation) (imageregistry_request_duration_seconds_count{operation=\"ManifestService.Put\"}), \"operation\", \"create\", \"operation\", \"(.+)\"), \"resource_type\", \"manifest\", \"resource_type\", \"\")","health":"ok","evaluationTime":0.000119852,"lastEvaluation":"2026-04-23T09:27:50.989255195Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000589566,"lastEvaluation":"2026-04-23T09:27:50.988787241Z"},{"name":"imagestreams.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-image-registry-imagestreams-rules-3d4a6ecb-594f-4058-93ab-a43c4dbb80d8.yaml","rules":[{"name":"imageregistry:imagestreamtags_count:sum","query":"sum by (location, source) (image_registry_image_stream_tags_total)","health":"ok","evaluationTime":0.00013502,"lastEvaluation":"2026-04-23T09:27:33.641174683Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000160571,"lastEvaluation":"2026-04-23T09:27:33.641151681Z"},{"name":"openshift-ingress-to-route-controller.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-ingress-operator-ingress-operator-f2fb9a36-4cf6-4595-b78e-b2d9cd67c93a.yaml","rules":[{"state":"inactive","name":"IngressWithoutClassName","query":"openshift_ingress_to_route_controller_ingress_without_class_name == 1","duration":86400,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"This alert fires when there is an Ingress with an unset IngressClassName for longer than one day.","message":"Ingress {{ $labels.namespace }}/{{ $labels.name }} is missing the IngressClassName for 1 day.","summary":"Ingress without IngressClassName for 1 day"},"alerts":[],"health":"ok","evaluationTime":0.000166747,"lastEvaluation":"2026-04-23T09:27:51.524380469Z","type":"alerting"},{"state":"inactive","name":"UnmanagedRoutes","query":"openshift_ingress_to_route_controller_route_with_unmanaged_owner == 1","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"This alert fires when there is a Route owned by an unmanaged Ingress.","message":"Route {{ $labels.namespace }}/{{ $labels.name }} is owned by an unmanaged Ingress.","summary":"Route owned by an Ingress no longer managed"},"alerts":[],"health":"ok","evaluationTime":0.000048726,"lastEvaluation":"2026-04-23T09:27:51.524551126Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000241754,"lastEvaluation":"2026-04-23T09:27:51.524360242Z"},{"name":"openshift-ingress.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-ingress-operator-ingress-operator-f2fb9a36-4cf6-4595-b78e-b2d9cd67c93a.yaml","rules":[{"state":"inactive","name":"HAProxyReloadFail","query":"template_router_reload_failure == 1","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"This alert fires when HAProxy fails to reload its configuration, which will result in the router not picking up recently created or modified routes.","message":"HAProxy reloads are failing on {{ $labels.pod }}. Router is not respecting recently created or modified routes","summary":"HAProxy reload failure"},"alerts":[],"health":"ok","evaluationTime":0.000183389,"lastEvaluation":"2026-04-23T09:27:59.255416547Z","type":"alerting"},{"state":"inactive","name":"HAProxyDown","query":"haproxy_up == 0","duration":300,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"This alert fires when metrics report that HAProxy is down.","message":"HAProxy metrics are reporting that HAProxy is down on pod {{ $labels.namespace }} / {{ $labels.pod }}","summary":"HAProxy is down"},"alerts":[],"health":"ok","evaluationTime":0.000088637,"lastEvaluation":"2026-04-23T09:27:59.255607206Z","type":"alerting"},{"state":"inactive","name":"IngressControllerDegraded","query":"ingress_controller_conditions{condition=\"Degraded\"} == 1","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"This alert fires when the IngressController status is degraded.","message":"The {{ $labels.namespace }}/{{ $labels.name }} ingresscontroller is\ndegraded: {{ $labels.reason }}.\n","summary":"IngressController is degraded"},"alerts":[],"health":"ok","evaluationTime":0.000162299,"lastEvaluation":"2026-04-23T09:27:59.255699892Z","type":"alerting"},{"state":"inactive","name":"IngressControllerUnavailable","query":"ingress_controller_conditions{condition=\"Available\"} == 0","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"This alert fires when the IngressController is not available.","message":"The {{ $labels.namespace }}/{{ $labels.name }} ingresscontroller is\nunavailable: {{ $labels.reason }}.\n","summary":"IngressController is unavailable"},"alerts":[],"health":"ok","evaluationTime":0.000064223,"lastEvaluation":"2026-04-23T09:27:59.255866854Z","type":"alerting"},{"name":"cluster:route_metrics_controller_routes_per_shard:min","query":"min(route_metrics_controller_routes_per_shard)","health":"ok","evaluationTime":0.000098536,"lastEvaluation":"2026-04-23T09:27:59.255935117Z","type":"recording"},{"name":"cluster:route_metrics_controller_routes_per_shard:max","query":"max(route_metrics_controller_routes_per_shard)","health":"ok","evaluationTime":0.000047553,"lastEvaluation":"2026-04-23T09:27:59.256037635Z","type":"recording"},{"name":"cluster:route_metrics_controller_routes_per_shard:avg","query":"avg(route_metrics_controller_routes_per_shard)","health":"ok","evaluationTime":0.000041215,"lastEvaluation":"2026-04-23T09:27:59.25608852Z","type":"recording"},{"name":"cluster:route_metrics_controller_routes_per_shard:median","query":"quantile(0.5, route_metrics_controller_routes_per_shard)","health":"ok","evaluationTime":0.000054598,"lastEvaluation":"2026-04-23T09:27:59.256133199Z","type":"recording"},{"name":"cluster:openshift_route_info:tls_termination:sum","query":"sum by (tls_termination) (openshift_route_info)","health":"ok","evaluationTime":0.000197408,"lastEvaluation":"2026-04-23T09:27:59.256191083Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.001010899,"lastEvaluation":"2026-04-23T09:27:59.255381096Z"},{"name":"insights","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-insights-insights-prometheus-rules-536e8ce2-69da-4f4b-a739-d509db2c73bd.yaml","rules":[{"state":"inactive","name":"InsightsDisabled","query":"max without (job, pod, service, instance) (cluster_operator_conditions{condition=\"Disabled\",name=\"insights\"} == 1)","duration":300,"keepFiringFor":0,"labels":{"namespace":"openshift-insights","severity":"info"},"annotations":{"description":"Insights operator is disabled. In order to enable Insights and benefit from recommendations specific to your cluster, please follow steps listed in the documentation: https://docs.openshift.com/container-platform/latest/support/remote_health_monitoring/enabling-remote-health-reporting.html","summary":"Insights operator is disabled."},"alerts":[],"health":"ok","evaluationTime":0.000214064,"lastEvaluation":"2026-04-23T09:27:39.500087173Z","type":"alerting"},{"state":"inactive","name":"SimpleContentAccessNotAvailable","query":"max without (job, pod, service, instance) (max_over_time(cluster_operator_conditions{condition=\"SCAAvailable\",name=\"insights\",reason=\"NotFound\"}[5m]) == 0)","duration":300,"keepFiringFor":0,"labels":{"namespace":"openshift-insights","severity":"info"},"annotations":{"description":"Simple content access (SCA) is not enabled. Once enabled, Insights Operator can automatically import the SCA certificates from Red Hat OpenShift Cluster Manager making it easier to use the content provided by your Red Hat subscriptions when creating container images. See https://docs.openshift.com/container-platform/latest/cicd/builds/running-entitled-builds.html for more information.","summary":"Simple content access certificates are not available."},"alerts":[],"health":"ok","evaluationTime":0.000129811,"lastEvaluation":"2026-04-23T09:27:39.50030768Z","type":"alerting"},{"state":"inactive","name":"InsightsRecommendationActive","query":"insights_recommendation_active == 1","duration":300,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"Insights recommendation \"{{ $labels.description }}\" with total risk \"{{ $labels.total_risk }}\" was detected on the cluster. More information is available at {{ $labels.info_link }}.","summary":"An Insights recommendation is active for this cluster."},"alerts":[],"health":"ok","evaluationTime":0.000053634,"lastEvaluation":"2026-04-23T09:27:39.500440685Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000452104,"lastEvaluation":"2026-04-23T09:27:39.500044568Z"},{"name":"pre-release-lifecycle","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-api-usage-437a005a-bb7b-4089-934a-049e4f3d2588.yaml","rules":[{"state":"inactive","name":"APIRemovedInNextReleaseInUse","query":"group by (group, version, resource) (apiserver_requested_deprecated_apis{removed_release=\"1.24\"}) and (sum by (group, version, resource) (rate(apiserver_request_total{system_client!=\"cluster-policy-controller\",system_client!=\"kube-controller-manager\"}[4h]))) \u003e 0","duration":3600,"keepFiringFor":0,"labels":{"namespace":"openshift-kube-apiserver","severity":"info"},"annotations":{"description":"Deprecated API that will be removed in the next version is being used. Removing the workload that is using the {{ $labels.group }}.{{ $labels.version }}/{{ $labels.resource }} API might be necessary for a successful upgrade to the next cluster version. Refer to `oc get apirequestcounts {{ $labels.resource }}.{{ $labels.version }}.{{ $labels.group }} -o yaml` to identify the workload.","summary":"Deprecated API that will be removed in the next version is being used."},"alerts":[],"health":"ok","evaluationTime":0.007987356,"lastEvaluation":"2026-04-23T09:27:40.849157903Z","type":"alerting"},{"state":"inactive","name":"APIRemovedInNextEUSReleaseInUse","query":"group by (group, version, resource) (apiserver_requested_deprecated_apis{removed_release=~\"1\\\\.2[45]\"}) and (sum by (group, version, resource) (rate(apiserver_request_total{system_client!=\"cluster-policy-controller\",system_client!=\"kube-controller-manager\"}[4h]))) \u003e 0","duration":3600,"keepFiringFor":0,"labels":{"namespace":"openshift-kube-apiserver","severity":"info"},"annotations":{"description":"Deprecated API that will be removed in the next EUS version is being used. Removing the workload that is using the {{ $labels.group }}.{{ $labels.version }}/{{ $labels.resource }} API might be necessary for a successful upgrade to the next EUS cluster version. Refer to `oc get apirequestcounts {{ $labels.resource }}.{{ $labels.version }}.{{ $labels.group }} -o yaml` to identify the workload.","summary":"Deprecated API that will be removed in the next EUS version is being used."},"alerts":[],"health":"ok","evaluationTime":0.007524422,"lastEvaluation":"2026-04-23T09:27:40.857157174Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.015552911,"lastEvaluation":"2026-04-23T09:27:40.849132334Z"},{"name":"pod-security-violation","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-podsecurity-9edbfc5f-a837-4e1d-b61e-a29077f1d81b.yaml","rules":[{"state":"inactive","name":"PodSecurityViolation","query":"sum by (policy_level, ocp_namespace) (increase(pod_security_evaluations_total{decision=\"deny\",mode=\"audit\",ocp_namespace=\"\",resource=\"pod\"}[1d])) \u003e 0","duration":0,"keepFiringFor":0,"labels":{"namespace":"openshift-kube-apiserver","severity":"info"},"annotations":{"description":"A workload (pod, deployment, daemonset, ...) was created somewhere in the cluster but it did not match the PodSecurity \"{{ $labels.policy_level }}\" profile defined by its namespace either via the cluster-wide configuration (which triggers on a \"restricted\" profile violations) or by the namespace local Pod Security labels. Refer to Kubernetes documentation on Pod Security Admission to learn more about these violations.","summary":"One or more workloads users created in the cluster don't match their Pod Security profile"},"alerts":[],"health":"ok","evaluationTime":0.000216772,"lastEvaluation":"2026-04-23T09:27:32.406301102Z","type":"alerting"},{"state":"inactive","name":"PodSecurityViolation","query":"sum by (policy_level, ocp_namespace) (increase(pod_security_evaluations_total{decision=\"deny\",mode=\"audit\",ocp_namespace!=\"\",resource=\"pod\"}[1d])) \u003e 0","duration":0,"keepFiringFor":0,"labels":{"namespace":"openshift-kube-apiserver","severity":"info"},"annotations":{"description":"A workload (pod, deployment, daemonset, ...) was created in namespace \"{{ $labels.ocp_namespace }}\" but it did not match the PodSecurity \"{{ $labels.policy_level }}\" profile defined by its namespace either via the cluster-wide configuration (which triggers on a \"restricted\" profile violations) or by the namespace local Pod Security labels. Refer to Kubernetes documentation on Pod Security Admission to learn more about these violations.","summary":"One or more workloads in platform namespaces of the cluster don't match their Pod Security profile"},"alerts":[],"health":"ok","evaluationTime":0.000070645,"lastEvaluation":"2026-04-23T09:27:32.406522693Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000318838,"lastEvaluation":"2026-04-23T09:27:32.40627713Z"},{"name":"mcc-drain-error","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-controller-502161c7-f61e-4529-a7f3-5e71bd340715.yaml","rules":[{"state":"inactive","name":"MCCDrainError","query":"mcc_drain_err \u003e 0","duration":0,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"warning"},"annotations":{"description":"Drain failed on {{ $labels.exported_node }} , updates may be blocked. For more details check MachineConfigController pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c machine-config-controller","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/MachineConfigControllerDrainError.md","summary":"Alerts the user to a failed node drain. Always triggers when the failure happens one or more times."},"alerts":[],"health":"ok","evaluationTime":0.000174013,"lastEvaluation":"2026-04-23T09:27:41.288932359Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000202965,"lastEvaluation":"2026-04-23T09:27:41.28890576Z"},{"name":"mcc-pool-alert","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-controller-502161c7-f61e-4529-a7f3-5e71bd340715.yaml","rules":[{"state":"inactive","name":"MCCPoolAlert","query":"mcc_pool_alert \u003e 0","duration":0,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"warning"},"annotations":{"description":"Node {{ $labels.exported_node }} has triggered a pool alert due to a label change. For more details check MachineConfigController pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c machine-config-controller","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/MachineConfigControllerPoolAlert.md","summary":"Triggers when nodes in a pool have overlapping labels such as master, worker, and a custom label therefore a choice must be made as to which is honored."},"alerts":[],"health":"ok","evaluationTime":0.000165195,"lastEvaluation":"2026-04-23T09:27:53.708138065Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000193293,"lastEvaluation":"2026-04-23T09:27:53.708112564Z"},{"name":"os-image-override.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-controller-502161c7-f61e-4529-a7f3-5e71bd340715.yaml","rules":[{"name":"os_image_url_override:sum","query":"sum(os_image_url_override)","health":"ok","evaluationTime":0.000141269,"lastEvaluation":"2026-04-23T09:27:50.950645401Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000167168,"lastEvaluation":"2026-04-23T09:27:50.950622171Z"},{"name":"extremely-high-individual-control-plane-memory","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-d5cd6dad-004a-4f43-ad4a-660204ca7a8a.yaml","rules":[{"state":"inactive","name":"ExtremelyHighIndividualControlPlaneMemory","query":"(1 - sum by (instance) (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes and on (instance) label_replace(kube_node_role{role=\"master\"}, \"instance\", \"$1\", \"node\", \"(.+)\")) / sum by (instance) (node_memory_MemTotal_bytes and on (instance) label_replace(kube_node_role{role=\"master\"}, \"instance\", \"$1\", \"node\", \"(.+)\"))) * 100 \u003e 90","duration":2700,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"critical"},"annotations":{"description":"The memory utilization per instance within control plane nodes influence the stability, and responsiveness of the cluster. This can lead to cluster instability and slow responses from kube-apiserver or failing requests especially on etcd. Moreover, OOM kill is expected which negatively influences the pod scheduling. If this happens on container level, the descheduler will not be able to detect it, as it works on the pod level. To fix this, increase memory of the affected node of control plane nodes.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/ExtremelyHighIndividualControlPlaneMemory.md","summary":"Extreme memory utilization per node within control plane nodes is extremely high, and could impact responsiveness and stability."},"alerts":[],"health":"ok","evaluationTime":0.00074683,"lastEvaluation":"2026-04-23T09:27:59.353434107Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.0007884,"lastEvaluation":"2026-04-23T09:27:59.353396103Z"},{"name":"high-overall-control-plane-memory","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-d5cd6dad-004a-4f43-ad4a-660204ca7a8a.yaml","rules":[{"state":"inactive","name":"HighOverallControlPlaneMemory","query":"(1 - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes and on (instance) label_replace(kube_node_role{role=\"master\"}, \"instance\", \"$1\", \"node\", \"(.+)\")) / sum(node_memory_MemTotal_bytes and on (instance) label_replace(kube_node_role{role=\"master\"}, \"instance\", \"$1\", \"node\", \"(.+)\"))) * 100 \u003e 60","duration":3600,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"warning"},"annotations":{"description":"Given three control plane nodes, the overall memory utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the kube-apiserver and etcd may be slow to respond. To fix this, increase memory of the control plane nodes.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/HighOverallControlPlaneMemory.md","summary":"Memory utilization across all control plane nodes is high, and could impact responsiveness and stability."},"alerts":[],"health":"ok","evaluationTime":0.000563752,"lastEvaluation":"2026-04-23T09:27:31.411241754Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000610619,"lastEvaluation":"2026-04-23T09:27:31.41119792Z"},{"name":"mcd-kubelet-health-state-error","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-d5cd6dad-004a-4f43-ad4a-660204ca7a8a.yaml","rules":[{"state":"inactive","name":"KubeletHealthState","query":"mcd_kubelet_state \u003e 2","duration":0,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"warning"},"annotations":{"description":"Kubelet health failure threshold reached","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/KubeletHealthState.md","summary":"This keeps track of Kubelet health failures, and tallies them. The warning is triggered if 2 or more failures occur."},"alerts":[],"health":"ok","evaluationTime":0.000152467,"lastEvaluation":"2026-04-23T09:27:32.009323698Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000176421,"lastEvaluation":"2026-04-23T09:27:32.009302891Z"},{"name":"mcd-missing-mc","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-d5cd6dad-004a-4f43-ad4a-660204ca7a8a.yaml","rules":[{"state":"inactive","name":"MissingMachineConfig","query":"mcd_missing_mc \u003e 0","duration":0,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"warning"},"annotations":{"description":"Could not find config {{ $labels.mc }} in-cluster, this likely indicates the MachineConfigs in-cluster has changed during the install process.  If you are seeing this when installing the cluster, please compare the in-cluster rendered machineconfigs to /etc/mcs-machine-config-content.json","summary":"This keeps track of Machine Config failures. Specifically a common failure on install when a rendered Machine Config is missing. Triggered when this error happens once."},"alerts":[],"health":"ok","evaluationTime":0.000179583,"lastEvaluation":"2026-04-23T09:27:42.882788326Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.00021663,"lastEvaluation":"2026-04-23T09:27:42.882753665Z"},{"name":"mcd-pivot-error","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-d5cd6dad-004a-4f43-ad4a-660204ca7a8a.yaml","rules":[{"state":"inactive","name":"MCDPivotError","query":"mcd_pivot_errors_total \u003e 0","duration":120,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"warning"},"annotations":{"description":"Error detected in pivot logs on {{ $labels.node }} , upgrade may be blocked. For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod }} -c machine-config-daemon ","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/MachineConfigDaemonPivotError.md","summary":"Alerts the user when an error is detected upon pivot. This triggers if the pivot errors are above zero for 2 minutes."},"alerts":[],"health":"ok","evaluationTime":0.000202018,"lastEvaluation":"2026-04-23T09:27:52.750449013Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000234061,"lastEvaluation":"2026-04-23T09:27:52.750419986Z"},{"name":"mcd-reboot-error","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-d5cd6dad-004a-4f43-ad4a-660204ca7a8a.yaml","rules":[{"state":"inactive","name":"MCDRebootError","query":"mcd_reboots_failed_total \u003e 0","duration":300,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"critical"},"annotations":{"description":"Reboot failed on {{ $labels.node }} , update may be blocked. For more details:  oc logs -f -n {{ $labels.namespace }} {{ $labels.pod }} -c machine-config-daemon ","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/MachineConfigDaemonRebootError.md","summary":"Alerts the user that a node failed to reboot one or more times over a span of 5 minutes."},"alerts":[],"health":"ok","evaluationTime":0.000193554,"lastEvaluation":"2026-04-23T09:27:47.583264483Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000218249,"lastEvaluation":"2026-04-23T09:27:47.583242497Z"},{"name":"system-memory-exceeds-reservation","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-d5cd6dad-004a-4f43-ad4a-660204ca7a8a.yaml","rules":[{"state":"inactive","name":"SystemMemoryExceedsReservation","query":"sum by (node) (container_memory_rss{id=\"/system.slice\"}) \u003e ((sum by (node) (kube_node_status_capacity{resource=\"memory\"} - kube_node_status_allocatable{resource=\"memory\"})) * 0.95)","duration":900,"keepFiringFor":0,"labels":{"namespace":"openshift-machine-config-operator","severity":"warning"},"annotations":{"description":"System memory usage of {{ $value | humanize }} on {{ $labels.node }} exceeds 95% of the reservation. Reserved memory ensures system processes can function even when the node is fully allocated and protects against workload out of memory events impacting the proper functioning of the node. The default reservation is expected to be sufficient for most configurations and should be increased (https://docs.openshift.com/container-platform/latest/nodes/nodes/nodes-nodes-managing.html) when running nodes with high numbers of pods (either due to rate of change or at steady state).","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/SystemMemoryExceedsReservation.md","summary":"Alerts the user when, for 15 minutes, a specific node is using more memory than is reserved"},"alerts":[],"health":"ok","evaluationTime":0.000298086,"lastEvaluation":"2026-04-23T09:27:37.578853372Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000322478,"lastEvaluation":"2026-04-23T09:27:37.578831564Z"},{"name":"telemetry.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-d5cd6dad-004a-4f43-ad4a-660204ca7a8a.yaml","rules":[{"name":"cluster:mcd_nodes_with_unsupported_packages:count","query":"count(mcd_local_unsupported_packages \u003e 0)","health":"ok","evaluationTime":0.000187837,"lastEvaluation":"2026-04-23T09:27:38.625552183Z","type":"recording"},{"name":"cluster:mcd_total_unsupported_packages:sum","query":"sum(mcd_local_unsupported_packages)","health":"ok","evaluationTime":0.000053336,"lastEvaluation":"2026-04-23T09:27:38.625745629Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000269418,"lastEvaluation":"2026-04-23T09:27:38.625531924Z"},{"name":"operator.marketplace.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-marketplace-marketplace-alert-rules-3ecf2238-2400-4d09-aecb-aa7effc65594.yaml","rules":[{"state":"inactive","name":"OperatorHubSourceError","query":"catalogsource_ready{exported_namespace=\"openshift-marketplace\"} == 0","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Operators shipped via the {{ $labels.name }} source are not available for installation until the issue is fixed. Operators already installed from this source will not receive updates until issue is fixed. Inspect the status of the pod owned by {{ $labels.name }} source in the openshift-marketplace namespace (oc -n openshift-marketplace get pods -l olm.catalogSource={{ $labels.name }}) to diagnose and repair.","summary":"The {{ $labels.name }} source is in non-ready state for more than 10 minutes."},"alerts":[],"health":"ok","evaluationTime":0.000196679,"lastEvaluation":"2026-04-23T09:27:46.589743935Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000222032,"lastEvaluation":"2026-04-23T09:27:46.589721166Z"},{"name":"alertmanager.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-alertmanager-main-rules-a132720e-eb3c-4b9f-960d-007c301572e3.yaml","rules":[{"state":"inactive","name":"AlertmanagerFailedReload","query":"max_over_time(alertmanager_config_last_reload_successful{job=~\"alertmanager-main|alertmanager-user-workload\"}[5m]) == 0","duration":600,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/AlertmanagerFailedReload.md","summary":"Reloading an Alertmanager configuration has failed."},"alerts":[],"health":"ok","evaluationTime":0.000112016,"lastEvaluation":"2026-04-23T09:27:33.688494012Z","type":"alerting"},{"state":"inactive","name":"AlertmanagerMembersInconsistent","query":"max_over_time(alertmanager_cluster_members{job=~\"alertmanager-main|alertmanager-user-workload\"}[5m]) \u003c on (namespace, service) group_left () count by (namespace, service) (max_over_time(alertmanager_cluster_members{job=~\"alertmanager-main|alertmanager-user-workload\"}[5m]))","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster.","summary":"A member of an Alertmanager cluster has not found all other cluster members."},"alerts":[],"health":"ok","evaluationTime":0.000098755,"lastEvaluation":"2026-04-23T09:27:33.688611639Z","type":"alerting"},{"state":"inactive","name":"AlertmanagerFailedToSendAlerts","query":"(rate(alertmanager_notifications_failed_total{job=~\"alertmanager-main|alertmanager-user-workload\"}[15m]) / ignoring (reason) group_left () rate(alertmanager_notifications_total{job=~\"alertmanager-main|alertmanager-user-workload\"}[15m])) \u003e 0.01","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/AlertmanagerFailedToSendAlerts.md","summary":"An Alertmanager instance failed to send notifications."},"alerts":[],"health":"ok","evaluationTime":0.00054409,"lastEvaluation":"2026-04-23T09:27:33.688713117Z","type":"alerting"},{"state":"inactive","name":"AlertmanagerClusterFailedToSendAlerts","query":"min by (namespace, service, integration) (rate(alertmanager_notifications_failed_total{integration=~\".*\",job=~\"alertmanager-main|alertmanager-user-workload\"}[15m]) / ignoring (reason) group_left () rate(alertmanager_notifications_total{integration=~\".*\",job=~\"alertmanager-main|alertmanager-user-workload\"}[15m]) \u003e 0) \u003e 0.01","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/AlertmanagerClusterFailedToSendAlerts.md","summary":"All Alertmanager instances in a cluster failed to send notifications to a critical integration."},"alerts":[],"health":"ok","evaluationTime":0.000614205,"lastEvaluation":"2026-04-23T09:27:33.689261037Z","type":"alerting"},{"state":"inactive","name":"AlertmanagerConfigInconsistent","query":"count by (namespace, service) (count_values by (namespace, service) (\"config_hash\", alertmanager_config_hash{job=~\"alertmanager-main|alertmanager-user-workload\"})) != 1","duration":1200,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Alertmanager instances within the {{$labels.job}} cluster have different configurations.","summary":"Alertmanager instances within the same cluster have different configurations."},"alerts":[],"health":"ok","evaluationTime":0.000093035,"lastEvaluation":"2026-04-23T09:27:33.689879371Z","type":"alerting"},{"state":"inactive","name":"AlertmanagerClusterDown","query":"(count by (namespace, service) (avg_over_time(up{job=~\"alertmanager-main|alertmanager-user-workload\"}[5m]) \u003c 0.5) / count by (namespace, service) (up{job=~\"alertmanager-main|alertmanager-user-workload\"})) \u003e= 0.5","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.","summary":"Half or more of the Alertmanager instances within the same cluster are down."},"alerts":[],"health":"ok","evaluationTime":0.000122545,"lastEvaluation":"2026-04-23T09:27:33.689975002Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.001619129,"lastEvaluation":"2026-04-23T09:27:33.688480428Z"},{"name":"apiserver-list-watch.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"apiserver_list_watch_request_success_total:rate:sum","query":"sum by (verb) (rate(apiserver_request_total{code=~\"2..\",verb=~\"LIST|WATCH\"}[5m]))","health":"ok","evaluationTime":0.004275677,"lastEvaluation":"2026-04-23T09:27:35.931146355Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.004298194,"lastEvaluation":"2026-04-23T09:27:35.93112732Z"},{"name":"general.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"state":"firing","name":"Watchdog","query":"vector(1)","duration":0,"keepFiringFor":0,"labels":{"namespace":"openshift-monitoring","severity":"none"},"annotations":{"description":"This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n","summary":"An alert that should always be firing to certify that Alertmanager is working properly."},"alerts":[{"labels":{"alertname":"Watchdog","namespace":"openshift-monitoring","severity":"none"},"annotations":{"description":"This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n","summary":"An alert that should always be firing to certify that Alertmanager is working properly."},"state":"firing","activeAt":"2026-04-23T08:51:09.704427129Z","value":"1e+00"}],"health":"ok","evaluationTime":0.000327572,"lastEvaluation":"2026-04-23T09:27:39.706412546Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000366501,"lastEvaluation":"2026-04-23T09:27:39.706377141Z"},{"name":"kube-prometheus-general.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"count:up1","query":"count without (instance, pod, node) (up == 1)","health":"ok","evaluationTime":0.000572656,"lastEvaluation":"2026-04-23T09:27:33.574991948Z","type":"recording"},{"name":"count:up0","query":"count without (instance, pod, node) (up == 0)","health":"ok","evaluationTime":0.000195612,"lastEvaluation":"2026-04-23T09:27:33.575571222Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000804553,"lastEvaluation":"2026-04-23T09:27:33.574964837Z"},{"name":"kube-prometheus-node-recording.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"instance:node_cpu:rate:sum","query":"sum by (instance) (rate(node_cpu_seconds_total{mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[3m]))","health":"ok","evaluationTime":0.000844597,"lastEvaluation":"2026-04-23T09:27:45.113034052Z","type":"recording"},{"name":"instance:node_network_receive_bytes:rate:sum","query":"sum by (instance) (rate(node_network_receive_bytes_total[3m]))","health":"ok","evaluationTime":0.000148648,"lastEvaluation":"2026-04-23T09:27:45.113885627Z","type":"recording"},{"name":"instance:node_network_transmit_bytes:rate:sum","query":"sum by (instance) (rate(node_network_transmit_bytes_total[3m]))","health":"ok","evaluationTime":0.000118601,"lastEvaluation":"2026-04-23T09:27:45.114038943Z","type":"recording"},{"name":"cluster:node_cpu:sum_rate5m","query":"sum(rate(node_cpu_seconds_total{mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m]))","health":"ok","evaluationTime":0.000613319,"lastEvaluation":"2026-04-23T09:27:45.114160761Z","type":"recording"},{"name":"cluster:node_cpu:ratio","query":"cluster:node_cpu:sum_rate5m / count(sum by (instance, cpu) (node_cpu_seconds_total))","health":"ok","evaluationTime":0.000828319,"lastEvaluation":"2026-04-23T09:27:45.114778418Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.002596659,"lastEvaluation":"2026-04-23T09:27:45.113013502Z"},{"name":"kubernetes-recurring.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"cluster:usage:workload:capacity_physical_cpu_core_seconds","query":"sum_over_time(workload:capacity_physical_cpu_cores:sum[30s:1s]) + ((cluster:usage:workload:capacity_physical_cpu_core_seconds offset 25s) or (absent(cluster:usage:workload:capacity_physical_cpu_core_seconds offset 25s) * 0))","health":"ok","evaluationTime":0.000417517,"lastEvaluation":"2026-04-23T09:27:45.918192899Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000447029,"lastEvaluation":"2026-04-23T09:27:45.91816653Z"},{"name":"node-network","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"state":"inactive","name":"NodeNetworkInterfaceFlapping","query":"changes(node_network_up{device!~\"veth.+|tunbr\",job=\"node-exporter\"}[2m]) \u003e 2","duration":120,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Network interface \"{{ $labels.device }}\" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}","summary":"Network interface is often changing its status"},"alerts":[],"health":"ok","evaluationTime":0.000349319,"lastEvaluation":"2026-04-23T09:27:43.078216806Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000374688,"lastEvaluation":"2026-04-23T09:27:43.078194923Z"},{"name":"openshift-build.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"openshift:build_by_strategy:sum","query":"sum by (strategy) (openshift_build_status_phase_total)","health":"ok","evaluationTime":0.000114347,"lastEvaluation":"2026-04-23T09:27:33.458755938Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000152899,"lastEvaluation":"2026-04-23T09:27:33.458720397Z"},{"name":"openshift-etcd-telemetry.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"instance:etcd_mvcc_db_total_size_in_bytes:sum","query":"sum by (instance) (etcd_mvcc_db_total_size_in_bytes{job=\"etcd\"})","health":"ok","evaluationTime":0.000129796,"lastEvaluation":"2026-04-23T09:27:42.314853167Z","type":"recording"},{"name":"instance:etcd_disk_wal_fsync_duration_seconds:histogram_quantile","query":"histogram_quantile(0.99, sum by (instance, le) (rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=\"etcd\"}[5m])))","labels":{"quantile":"0.99"},"health":"ok","evaluationTime":0.000145226,"lastEvaluation":"2026-04-23T09:27:42.314987028Z","type":"recording"},{"name":"instance:etcd_network_peer_round_trip_time_seconds:histogram_quantile","query":"histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=\"etcd\"}[5m])))","labels":{"quantile":"0.99"},"health":"ok","evaluationTime":0.000068939,"lastEvaluation":"2026-04-23T09:27:42.315135342Z","type":"recording"},{"name":"instance:etcd_mvcc_db_total_size_in_use_in_bytes:sum","query":"sum by (instance) (etcd_mvcc_db_total_size_in_use_in_bytes{job=\"etcd\"})","health":"ok","evaluationTime":0.000035944,"lastEvaluation":"2026-04-23T09:27:42.315206924Z","type":"recording"},{"name":"instance:etcd_disk_backend_commit_duration_seconds:histogram_quantile","query":"histogram_quantile(0.99, sum by (instance, le) (rate(etcd_disk_backend_commit_duration_seconds_bucket{job=\"etcd\"}[5m])))","labels":{"quantile":"0.99"},"health":"ok","evaluationTime":0.000068747,"lastEvaluation":"2026-04-23T09:27:42.315245407Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000489532,"lastEvaluation":"2026-04-23T09:27:42.314827039Z"},{"name":"openshift-general.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"state":"inactive","name":"TargetDown","query":"100 * ((1 - sum by (job, namespace, service) (up and on (namespace, pod) kube_pod_info) / count by (job, namespace, service) (up and on (namespace, pod) kube_pod_info)) or (count by (job, namespace, service) (up == 0) / count by (job, namespace, service) (up))) \u003e 10","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ printf \"%.4g\" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace have been unreachable for more than 15 minutes. This may be a symptom of network connectivity issues, down nodes, or failures within these components. Assess the health of the infrastructure and nodes running these targets and then contact support.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/TargetDown.md","summary":"Some targets were not reachable from the monitoring server for an extended period of time."},"alerts":[],"health":"ok","evaluationTime":0.001484087,"lastEvaluation":"2026-04-23T09:27:36.856175059Z","type":"alerting"},{"state":"inactive","name":"PrometheusPossibleNarrowSelectors","query":"increase(prometheus_narrow_selectors_count{job=~\"prometheus-k8s|prometheus-user-workload|thanos-querier|thanos-ruler\"}[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Queries or/and relabel configs on Prometheus/Thanos {{$labels.namespace}}/{{$labels.pod}} could be too restrictive.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/PrometheusPossibleNarrowSelectors.md","summary":"Some queries or/and relabel configs with selectors on the values of the \"le\" label of classic histograms or/and the \"quantile\" label of summaries\nmay not take into account that values could also be floats, they may need to be adjusted.\n"},"alerts":[],"health":"ok","evaluationTime":0.000110044,"lastEvaluation":"2026-04-23T09:27:36.85766652Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.001633309,"lastEvaluation":"2026-04-23T09:27:36.856145394Z"},{"name":"openshift-ingress.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"code:cluster:ingress_http_request_count:rate5m:sum","query":"sum by (code) (rate(haproxy_server_http_responses_total[5m]) \u003e 0)","health":"ok","evaluationTime":0.000611352,"lastEvaluation":"2026-04-23T09:27:56.979224939Z","type":"recording"},{"name":"cluster:usage:ingress_frontend_bytes_in:rate5m:sum","query":"sum(rate(haproxy_frontend_bytes_in_total[5m]))","health":"ok","evaluationTime":0.000170196,"lastEvaluation":"2026-04-23T09:27:56.979846455Z","type":"recording"},{"name":"cluster:usage:ingress_frontend_bytes_out:rate5m:sum","query":"sum(rate(haproxy_frontend_bytes_out_total[5m]))","health":"ok","evaluationTime":0.000123086,"lastEvaluation":"2026-04-23T09:27:56.980025106Z","type":"recording"},{"name":"cluster:usage:ingress_frontend_connections:sum","query":"sum(haproxy_frontend_current_sessions)","health":"ok","evaluationTime":0.000102265,"lastEvaluation":"2026-04-23T09:27:56.980154083Z","type":"recording"},{"name":"cluster:usage:workload:ingress_request_error:fraction5m","query":"sum(max without (service, endpoint, container, pod, job, namespace) (increase(haproxy_server_http_responses_total{code!~\"2xx|1xx|4xx|3xx\",exported_namespace!~\"openshift-.*\"}[5m]) \u003e 0)) / sum(max without (service, endpoint, container, pod, job, namespace) (increase(haproxy_server_http_responses_total{exported_namespace!~\"openshift-.*\"}[5m]))) or absent(__does_not_exist__) * 0","health":"ok","evaluationTime":0.000412217,"lastEvaluation":"2026-04-23T09:27:56.980262378Z","type":"recording"},{"name":"cluster:usage:workload:ingress_request_total:irate5m","query":"sum(max without (service, endpoint, container, pod, job, namespace) (irate(haproxy_server_http_responses_total{exported_namespace!~\"openshift-.*\"}[5m]))) or absent(__does_not_exist__) * 0","health":"ok","evaluationTime":0.000233153,"lastEvaluation":"2026-04-23T09:27:56.980680607Z","type":"recording"},{"name":"cluster:usage:openshift:ingress_request_error:fraction5m","query":"sum(max without (service, endpoint, container, pod, job, namespace) (increase(haproxy_server_http_responses_total{code!~\"2xx|1xx|4xx|3xx\",exported_namespace=~\"openshift-.*\"}[5m]) \u003e 0)) / sum(max without (service, endpoint, container, pod, job, namespace) (increase(haproxy_server_http_responses_total{exported_namespace=~\"openshift-.*\"}[5m]))) or absent(__does_not_exist__) * 0","health":"ok","evaluationTime":0.000777272,"lastEvaluation":"2026-04-23T09:27:56.980920468Z","type":"recording"},{"name":"cluster:usage:openshift:ingress_request_total:irate5m","query":"sum(max without (service, endpoint, container, pod, job, namespace) (irate(haproxy_server_http_responses_total{exported_namespace=~\"openshift-.*\"}[5m]))) or absent(__does_not_exist__) * 0","health":"ok","evaluationTime":0.000561953,"lastEvaluation":"2026-04-23T09:27:56.981708405Z","type":"recording"},{"name":"cluster:ingress_controller_aws_nlb_active:sum","query":"sum(ingress_controller_aws_nlb_active) or vector(0)","health":"ok","evaluationTime":0.000130551,"lastEvaluation":"2026-04-23T09:27:56.982277209Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.003227795,"lastEvaluation":"2026-04-23T09:27:56.979182521Z"},{"name":"openshift-kubernetes.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"pod:container_cpu_usage:sum","query":"sum by (pod, namespace) (rate(container_cpu_usage_seconds_total{container=\"\",pod!=\"\"}[5m]))","health":"ok","evaluationTime":0.00126721,"lastEvaluation":"2026-04-23T09:27:52.292052573Z","type":"recording"},{"name":"pod:container_fs_usage_bytes:sum","query":"sum by (pod, namespace) (container_fs_usage_bytes{pod!=\"\"})","health":"ok","evaluationTime":0.001391328,"lastEvaluation":"2026-04-23T09:27:52.293330437Z","type":"recording"},{"name":"namespace:container_memory_usage_bytes:sum","query":"sum by (namespace) (container_memory_usage_bytes{container!=\"\"})","health":"ok","evaluationTime":0.00114665,"lastEvaluation":"2026-04-23T09:27:52.294728688Z","type":"recording"},{"name":"namespace:container_cpu_usage:sum","query":"sum by (namespace) (rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\"}[5m]))","health":"ok","evaluationTime":0.000980801,"lastEvaluation":"2026-04-23T09:27:52.295881639Z","type":"recording"},{"name":"cluster:memory_usage:ratio","query":"sum by (cluster) (container_memory_usage_bytes{container=\"\",pod!=\"\"}) / sum by (cluster) (machine_memory_bytes)","health":"ok","evaluationTime":0.000748454,"lastEvaluation":"2026-04-23T09:27:52.296867689Z","type":"recording"},{"name":"cluster:container_spec_cpu_shares:ratio","query":"sum(container_spec_cpu_shares{container=\"\",pod!=\"\"}) / 1000 / sum(machine_cpu_cores)","health":"ok","evaluationTime":0.000546376,"lastEvaluation":"2026-04-23T09:27:52.297621068Z","type":"recording"},{"name":"cluster:container_cpu_usage:ratio","query":"sum(rate(container_cpu_usage_seconds_total{container=\"\",pod!=\"\"}[5m])) / sum(machine_cpu_cores)","health":"ok","evaluationTime":0.000717992,"lastEvaluation":"2026-04-23T09:27:52.29817121Z","type":"recording"},{"name":"pod_interface_network:container_network_receive_bytes:irate5m","query":"sum by (namespace, pod, interface) (irate(container_network_receive_bytes_total{pod!=\"\"}[5m])) + on (namespace, pod, interface) group_left (network_name) topk by (namespace, pod, interface) (1, pod_network_name_info)","health":"ok","evaluationTime":0.006037838,"lastEvaluation":"2026-04-23T09:27:52.298893816Z","type":"recording"},{"name":"pod_interface_network:container_network_transmit_bytes_total:irate5m","query":"sum by (namespace, pod, interface) (irate(container_network_transmit_bytes_total{pod!=\"\"}[5m])) + on (namespace, pod, interface) group_left (network_name) topk by (namespace, pod, interface) (1, pod_network_name_info)","health":"ok","evaluationTime":0.006152302,"lastEvaluation":"2026-04-23T09:27:52.304942062Z","type":"recording"},{"name":"cluster:master_nodes","query":"max without (endpoint, instance, job, pod, service) (kube_node_labels and on (node) kube_node_role{role=\"master\"})","labels":{"label_node_role_kubernetes_io":"master","label_node_role_kubernetes_io_master":"true"},"health":"ok","evaluationTime":0.000121469,"lastEvaluation":"2026-04-23T09:27:52.311105978Z","type":"recording"},{"name":"cluster:infra_nodes","query":"max without (endpoint, instance, job, pod, service) (kube_node_labels and on (node) kube_node_role{role=\"infra\"})","labels":{"label_node_role_kubernetes_io_infra":"true"},"health":"ok","evaluationTime":0.000060799,"lastEvaluation":"2026-04-23T09:27:52.311230253Z","type":"recording"},{"name":"cluster:master_infra_nodes","query":"max without (endpoint, instance, job, pod, service) (cluster:master_nodes and on (node) cluster:infra_nodes)","labels":{"label_node_role_kubernetes_io_infra":"true","label_node_role_kubernetes_io_master":"true"},"health":"ok","evaluationTime":0.000084737,"lastEvaluation":"2026-04-23T09:27:52.31129512Z","type":"recording"},{"name":"cluster:nodes_roles","query":"cluster:master_infra_nodes or on (node) cluster:master_nodes or on (node) cluster:infra_nodes or on (node) max without (endpoint, instance, job, pod, service) (kube_node_labels)","health":"ok","evaluationTime":0.00012008,"lastEvaluation":"2026-04-23T09:27:52.311382479Z","type":"recording"},{"name":"cluster:hyperthread_enabled_nodes","query":"kube_node_labels and on (node) (sum by (node, package, core) (label_replace(node_cpu_info, \"node\", \"$1\", \"instance\", \"(.*)\")) == 2)","labels":{"label_node_hyperthread_enabled":"true"},"health":"ok","evaluationTime":0.000393742,"lastEvaluation":"2026-04-23T09:27:52.311506892Z","type":"recording"},{"name":"cluster:virt_platform_nodes:sum","query":"count by (type, system_manufacturer, system_product_name, baseboard_manufacturer, baseboard_product_name) (sum by (instance, type, system_manufacturer, system_product_name, baseboard_manufacturer, baseboard_product_name) (virt_platform))","health":"ok","evaluationTime":0.000137375,"lastEvaluation":"2026-04-23T09:27:52.31191017Z","type":"recording"},{"name":"cluster:capacity_cpu_cores:sum","query":"sum by (label_beta_kubernetes_io_instance_type, label_node_role_kubernetes_io, label_kubernetes_io_arch, label_node_openshift_io_os_id) ((cluster:master_nodes * on (node) group_left () max by (node) (kube_node_status_capacity{resource=\"cpu\",unit=\"core\"})) or on (node) (label_replace(cluster:infra_nodes, \"label_node_role_kubernetes_io\", \"infra\", \"\", \"\") * on (node) group_left () max by (node) (kube_node_status_capacity{resource=\"cpu\",unit=\"core\"})) or on (node) (max without (endpoint, instance, job, pod, service) (kube_node_labels) * on (node) group_left () max by (node) (kube_node_status_capacity{resource=\"cpu\",unit=\"core\"})))","health":"ok","evaluationTime":0.000348336,"lastEvaluation":"2026-04-23T09:27:52.31205088Z","type":"recording"},{"name":"cluster:cpu_core_hyperthreading","query":"clamp_max(label_replace(sum by (instance, package, core) (node_cpu_info{core!=\"\",package!=\"\"} or label_replace(label_join(node_cpu_info{core=\"\",package=\"\"}, \"core\", \"\", \"cpu\"), \"package\", \"0\", \"package\", \"\")) \u003e 1, \"label_node_hyperthread_enabled\", \"true\", \"instance\", \"(.*)\") or on (instance, package) label_replace(sum by (instance, package, core) (label_replace(node_cpu_info{core!=\"\",package!=\"\"} or label_join(node_cpu_info{core=\"\",package=\"\"}, \"core\", \"\", \"cpu\"), \"package\", \"0\", \"package\", \"\")) \u003c= 1, \"label_node_hyperthread_enabled\", \"false\", \"instance\", \"(.*)\"), 1)","health":"ok","evaluationTime":0.00058152,"lastEvaluation":"2026-04-23T09:27:52.312403902Z","type":"recording"},{"name":"cluster:cpu_core_node_labels","query":"topk by (node) (1, cluster:nodes_roles) * on (node) group_right (label_beta_kubernetes_io_instance_type, label_node_role_kubernetes_io, label_node_openshift_io_os_id, label_kubernetes_io_arch, label_node_role_kubernetes_io_master, label_node_role_kubernetes_io_infra) label_replace(cluster:cpu_core_hyperthreading, \"node\", \"$1\", \"instance\", \"(.*)\")","health":"ok","evaluationTime":0.000237983,"lastEvaluation":"2026-04-23T09:27:52.312990237Z","type":"recording"},{"name":"cluster:capacity_cpu_cores_hyperthread_enabled:sum","query":"count by (label_beta_kubernetes_io_instance_type, label_node_hyperthread_enabled) (cluster:cpu_core_node_labels)","health":"ok","evaluationTime":0.000086862,"lastEvaluation":"2026-04-23T09:27:52.313232032Z","type":"recording"},{"name":"cluster:capacity_memory_bytes:sum","query":"sum by (label_beta_kubernetes_io_instance_type, label_node_role_kubernetes_io) ((cluster:master_nodes * on (node) group_left () max by (node) (kube_node_status_capacity{resource=\"memory\",unit=\"byte\"})) or on (node) (max without (endpoint, instance, job, pod, service) (kube_node_labels) * on (node) group_left () max by (node) (kube_node_status_capacity{resource=\"memory\",unit=\"byte\"})))","health":"ok","evaluationTime":0.000265834,"lastEvaluation":"2026-04-23T09:27:52.313321789Z","type":"recording"},{"name":"cluster:cpu_usage_cores:sum","query":"sum(1 - rate(node_cpu_seconds_total{mode=\"idle\"}[2m]) * on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{pod=~\"node-exporter.+\"})","health":"ok","evaluationTime":0.000353364,"lastEvaluation":"2026-04-23T09:27:52.313591593Z","type":"recording"},{"name":"cluster:memory_usage_bytes:sum","query":"sum(node_memory_MemTotal_bytes{job=\"node-exporter\"} - node_memory_MemAvailable_bytes{job=\"node-exporter\"})","health":"ok","evaluationTime":0.000104306,"lastEvaluation":"2026-04-23T09:27:52.313949056Z","type":"recording"},{"name":"workload:cpu_usage_cores:sum","query":"sum(rate(container_cpu_usage_seconds_total{container=\"\",namespace!~\"openshift-.+\",pod!=\"\"}[5m]))","health":"ok","evaluationTime":0.000582559,"lastEvaluation":"2026-04-23T09:27:52.31405644Z","type":"recording"},{"name":"openshift:cpu_usage_cores:sum","query":"cluster:cpu_usage_cores:sum - workload:cpu_usage_cores:sum","health":"ok","evaluationTime":0.000076505,"lastEvaluation":"2026-04-23T09:27:52.314644732Z","type":"recording"},{"name":"workload:memory_usage_bytes:sum","query":"sum(container_memory_working_set_bytes{container=\"\",namespace!~\"openshift-.+\",pod!=\"\"})","health":"ok","evaluationTime":0.000482732,"lastEvaluation":"2026-04-23T09:27:52.314724248Z","type":"recording"},{"name":"openshift:memory_usage_bytes:sum","query":"cluster:memory_usage_bytes:sum - workload:memory_usage_bytes:sum","health":"ok","evaluationTime":0.000068678,"lastEvaluation":"2026-04-23T09:27:52.315211823Z","type":"recording"},{"name":"cluster:node_instance_type_count:sum","query":"sum by (label_beta_kubernetes_io_instance_type, label_node_role_kubernetes_io, label_kubernetes_io_arch, label_node_openshift_io_os_id) (cluster:master_nodes or on (node) kube_node_labels)","health":"ok","evaluationTime":0.00012077,"lastEvaluation":"2026-04-23T09:27:52.315283364Z","type":"recording"},{"name":"cluster:kube_persistentvolumeclaim_resource_requests_storage_bytes:provisioner:sum","query":"sum by (provisioner) (topk by (namespace, persistentvolumeclaim) (1, kube_persistentvolumeclaim_resource_requests_storage_bytes) * on (namespace, persistentvolumeclaim) group_right () topk by (namespace, persistentvolumeclaim) (1, kube_persistentvolumeclaim_info * on (storageclass) group_left (provisioner) topk by (storageclass) (1, max by (storageclass, provisioner) (kube_storageclass_info))))","health":"ok","evaluationTime":0.000134458,"lastEvaluation":"2026-04-23T09:27:52.315407363Z","type":"recording"},{"name":"workload:capacity_physical_cpu_cores:sum","query":"(sum(node_role_os_version_machine:cpu_capacity_cores:sum{label_node_role_kubernetes_io_infra=\"\",label_node_role_kubernetes_io_master=\"\"} or absent(__does_not_exist__) * 0)) + ((sum(node_role_os_version_machine:cpu_capacity_cores:sum{label_node_role_kubernetes_io_master=\"true\"} or absent(__does_not_exist__) * 0) * ((max(cluster_master_schedulable == 1) * 0 + 1) or (absent(cluster_master_schedulable == 1) * 0))))","health":"ok","evaluationTime":0.000226988,"lastEvaluation":"2026-04-23T09:27:52.315544389Z","type":"recording"},{"name":"cluster:usage:workload:capacity_physical_cpu_cores:min:5m","query":"min_over_time(workload:capacity_physical_cpu_cores:sum[5m:15s])","health":"ok","evaluationTime":0.000062234,"lastEvaluation":"2026-04-23T09:27:52.315774313Z","type":"recording"},{"name":"cluster:usage:workload:capacity_physical_cpu_cores:max:5m","query":"max_over_time(workload:capacity_physical_cpu_cores:sum[5m:15s])","health":"ok","evaluationTime":0.000054523,"lastEvaluation":"2026-04-23T09:27:52.3158394Z","type":"recording"},{"name":"cluster:kubelet_volume_stats_used_bytes:provisioner:sum","query":"sum by (provisioner) (topk by (namespace, persistentvolumeclaim) (1, kubelet_volume_stats_used_bytes) * on (namespace, persistentvolumeclaim) group_right () topk by (namespace, persistentvolumeclaim) (1, kube_persistentvolumeclaim_info * on (storageclass) group_left (provisioner) topk by (storageclass) (1, max by (storageclass, provisioner) (kube_storageclass_info))))","health":"ok","evaluationTime":0.00012489,"lastEvaluation":"2026-04-23T09:27:52.315896581Z","type":"recording"},{"name":"instance:etcd_object_counts:sum","query":"sum by (instance) (apiserver_storage_objects != -1)","health":"ok","evaluationTime":0.000614052,"lastEvaluation":"2026-04-23T09:27:52.316024017Z","type":"recording"},{"name":"cluster:usage:resources:sum","query":"topk(500, max by (resource) (apiserver_storage_objects != -1))","health":"ok","evaluationTime":0.001108049,"lastEvaluation":"2026-04-23T09:27:52.316642043Z","type":"recording"},{"name":"cluster:usage:pods:terminal:workload:sum","query":"count(count by (namespace, pod) (kube_pod_restart_policy{namespace!~\"openshift-.+\",type!=\"Always\"}))","health":"ok","evaluationTime":0.000091847,"lastEvaluation":"2026-04-23T09:27:52.317755631Z","type":"recording"},{"name":"cluster:usage:containers:sum","query":"sum(max by (instance) (kubelet_containers_per_pod_count_sum))","health":"ok","evaluationTime":0.000067671,"lastEvaluation":"2026-04-23T09:27:52.317850382Z","type":"recording"},{"name":"node_role_os_version_machine:cpu_capacity_cores:sum","query":"count by (label_kubernetes_io_arch, label_node_hyperthread_enabled, label_node_openshift_io_os_id, label_node_role_kubernetes_io_master, label_node_role_kubernetes_io_infra) (cluster:cpu_core_node_labels)","health":"ok","evaluationTime":0.000090241,"lastEvaluation":"2026-04-23T09:27:52.317920897Z","type":"recording"},{"name":"cluster:capacity_cpu_sockets_hyperthread_enabled:sum","query":"count by (label_beta_kubernetes_io_instance_type, label_node_hyperthread_enabled, label_node_role_kubernetes_io) (max by (node, package, label_beta_kubernetes_io_instance_type, label_node_hyperthread_enabled, label_node_role_kubernetes_io) (cluster:cpu_core_node_labels))","health":"ok","evaluationTime":0.000094016,"lastEvaluation":"2026-04-23T09:27:52.318013764Z","type":"recording"},{"name":"node_role_os_version_machine:cpu_capacity_sockets:sum","query":"count by (label_kubernetes_io_arch, label_node_hyperthread_enabled, label_node_openshift_io_os_id, label_node_role_kubernetes_io_master, label_node_role_kubernetes_io_infra) (max by (node, package, label_kubernetes_io_arch, label_node_hyperthread_enabled, label_node_openshift_io_os_id, label_node_role_kubernetes_io_master, label_node_role_kubernetes_io_infra) (cluster:cpu_core_node_labels))","health":"ok","evaluationTime":0.000093504,"lastEvaluation":"2026-04-23T09:27:52.318111271Z","type":"recording"},{"name":"cluster:alertmanager_integrations:max","query":"max(alertmanager_integrations{namespace=\"openshift-monitoring\"})","health":"ok","evaluationTime":0.000046207,"lastEvaluation":"2026-04-23T09:27:52.318207244Z","type":"recording"},{"name":"cluster:kube_persistentvolume_plugin_type_counts:sum","query":"sum by (plugin_name, volume_mode) (pv_collector_total_pv_count{volume_plugin!~\".*-e2e-.*\"})","health":"ok","evaluationTime":0.000052213,"lastEvaluation":"2026-04-23T09:27:52.318256013Z","type":"recording"},{"name":"cluster:control_plane:all_nodes_ready","query":"sum(min by (node) (kube_node_status_condition{condition=\"Ready\",status=\"true\"}) and max by (node) (kube_node_role{role=\"master\"})) == bool sum(kube_node_role{role=\"master\"})","health":"ok","evaluationTime":0.000125778,"lastEvaluation":"2026-04-23T09:27:52.318310421Z","type":"recording"},{"name":"profile:cluster_monitoring_operator_collection_profile:max","query":"max by (profile) (cluster_monitoring_operator_collection_profile == 1)","health":"ok","evaluationTime":0.000044404,"lastEvaluation":"2026-04-23T09:27:52.318438692Z","type":"recording"},{"state":"inactive","name":"ClusterMonitoringOperatorReconciliationErrors","query":"max_over_time(cluster_monitoring_operator_last_reconciliation_successful[5m]) == 0","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Errors are occurring during reconciliation cycles. Inspect the cluster-monitoring-operator log for potential root causes.","summary":"Cluster Monitoring Operator is experiencing unexpected reconciliation errors."},"alerts":[],"health":"ok","evaluationTime":0.000058181,"lastEvaluation":"2026-04-23T09:27:52.318485884Z","type":"alerting"},{"state":"inactive","name":"ClusterMonitoringOperatorDeprecatedConfig","query":"max by (configmap, field, deprecation_version) (cluster_monitoring_operator_deprecated_config_in_use) == 1","duration":3600,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"The configuration field {{ $labels.field }} in {{ $labels.configmap }} was deprecated in {{ $labels.deprecation_version }} and has no effect.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterMonitoringOperatorDeprecatedConfig.md","summary":"Cluster Monitoring Operator is being used with deprecated configuration."},"alerts":[],"health":"ok","evaluationTime":0.00005449,"lastEvaluation":"2026-04-23T09:27:52.318546398Z","type":"alerting"},{"state":"firing","name":"AlertmanagerReceiversNotConfigured","query":"cluster:alertmanager_integrations:max == 0","duration":600,"keepFiringFor":0,"labels":{"namespace":"openshift-monitoring","severity":"warning"},"annotations":{"description":"Alerts are not configured to be sent to a notification system, meaning that you may not be notified in a timely fashion when important failures occur. Check the OpenShift documentation to learn how to configure notifications with Alertmanager.","summary":"Receivers (notification integrations) are not configured on Alertmanager"},"alerts":[{"labels":{"alertname":"AlertmanagerReceiversNotConfigured","namespace":"openshift-monitoring","severity":"warning"},"annotations":{"description":"Alerts are not configured to be sent to a notification system, meaning that you may not be notified in a timely fashion when important failures occur. Check the OpenShift documentation to learn how to configure notifications with Alertmanager.","summary":"Receivers (notification integrations) are not configured on Alertmanager"},"state":"firing","activeAt":"2026-04-23T08:52:22.291205032Z","value":"0e+00"}],"health":"ok","evaluationTime":0.000197616,"lastEvaluation":"2026-04-23T09:27:52.318603594Z","type":"alerting"},{"state":"inactive","name":"KubeDeploymentReplicasMismatch","query":"(((kube_deployment_spec_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e kube_deployment_status_replicas_available{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) and (changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}[5m]) == 0)) * on () group_left () cluster:control_plane:all_nodes_ready) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes. This indicates that cluster infrastructure is unable to start or restart the necessary components. This most often occurs when one or more nodes are down or partioned from the cluster, or a fault occurs on the node that prevents the workload from starting. In rare cases this may indicate a new version of a cluster component cannot start due to a bug or configuration error. Assess the pods for this deployment to verify they are running on healthy nodes and then contact support.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubeDeploymentReplicasMismatch.md","summary":"Deployment has not matched the expected number of replicas"},"alerts":[],"health":"ok","evaluationTime":0.000567803,"lastEvaluation":"2026-04-23T09:27:52.318805224Z","type":"alerting"},{"name":"cluster:usage:kube_schedulable_node_ready_reachable:avg5m","query":"avg_over_time((((count((max by (node) (up{job=\"kubelet\",metrics_path=\"/metrics\"} == 1) and max by (node) (kube_node_status_condition{condition=\"Ready\",status=\"true\"} == 1) and min by (node) (kube_node_spec_unschedulable == 0))) / scalar(count(min by (node) (kube_node_spec_unschedulable == 0))))))[5m:1s])","health":"ok","evaluationTime":0.001856845,"lastEvaluation":"2026-04-23T09:27:52.319377878Z","type":"recording"},{"name":"cluster:usage:kube_node_ready:avg5m","query":"avg_over_time((count(max by (node) (kube_node_status_condition{condition=\"Ready\",status=\"true\"} == 1)) / scalar(count(max by (node) (kube_node_status_condition{condition=\"Ready\",status=\"true\"}))))[5m:1s])","health":"ok","evaluationTime":0.000680177,"lastEvaluation":"2026-04-23T09:27:52.321240247Z","type":"recording"},{"name":"kube_running_pod_ready","query":"(max without (condition, container, endpoint, instance, job, service) (((kube_pod_status_ready{condition=\"false\"} == 1) * 0 or (kube_pod_status_ready{condition=\"true\"} == 1)) * on (pod, namespace) group_left () group by (pod, namespace) (kube_pod_status_phase{phase=~\"Running|Unknown|Pending\"} == 1)))","health":"ok","evaluationTime":0.001741379,"lastEvaluation":"2026-04-23T09:27:52.321925018Z","type":"recording"},{"name":"cluster:usage:openshift:kube_running_pod_ready:avg","query":"avg(kube_running_pod_ready{namespace=~\"openshift-.*\"})","health":"ok","evaluationTime":0.00031206,"lastEvaluation":"2026-04-23T09:27:52.32367245Z","type":"recording"},{"name":"cluster:usage:workload:kube_running_pod_ready:avg","query":"avg(kube_running_pod_ready{namespace!~\"openshift-.*\"})","health":"ok","evaluationTime":0.000146743,"lastEvaluation":"2026-04-23T09:27:52.323988107Z","type":"recording"},{"state":"inactive","name":"KubePodNotScheduled","query":"last_over_time(kube_pod_status_unschedulable{namespace=~\"(openshift-.*|kube-.*|default)\"}[5m]) == 1","duration":1800,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Pod {{ $labels.namespace }}/{{ $labels.pod }} cannot be scheduled for more than 30 minutes.\nCheck the details of the pod with the following command:\noc describe -n {{ $labels.namespace }} pod {{ $labels.pod }}","summary":"Pod cannot be scheduled."},"alerts":[],"health":"ok","evaluationTime":0.000107011,"lastEvaluation":"2026-04-23T09:27:52.324137996Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.032215618,"lastEvaluation":"2026-04-23T09:27:52.292031794Z"},{"name":"openshift-monitoring.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"openshift:prometheus_tsdb_head_series:sum","query":"sum by (job, namespace) (max without (instance) (prometheus_tsdb_head_series{namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}))","health":"ok","evaluationTime":0.000268891,"lastEvaluation":"2026-04-23T09:27:51.626523665Z","type":"recording"},{"name":"openshift:prometheus_tsdb_head_samples_appended_total:sum","query":"sum by (job, namespace) (max without (instance) (rate(prometheus_tsdb_head_samples_appended_total{namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[2m])))","health":"ok","evaluationTime":0.000122254,"lastEvaluation":"2026-04-23T09:27:51.626797966Z","type":"recording"},{"name":"monitoring:container_memory_working_set_bytes:sum","query":"sum by (namespace) (max without (instance) (container_memory_working_set_bytes{container=\"\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}))","health":"ok","evaluationTime":0.000285959,"lastEvaluation":"2026-04-23T09:27:51.626923443Z","type":"recording"},{"name":"namespace_job:scrape_series_added:topk3_sum1h","query":"topk(3, sum by (namespace, job) (sum_over_time(scrape_series_added[1h])))","health":"ok","evaluationTime":0.000521363,"lastEvaluation":"2026-04-23T09:27:51.62721301Z","type":"recording"},{"name":"namespace_job:scrape_samples_post_metric_relabeling:topk3","query":"topk(3, max by (namespace, job) (topk by (namespace, job) (1, scrape_samples_post_metric_relabeling)))","health":"ok","evaluationTime":0.000305081,"lastEvaluation":"2026-04-23T09:27:51.627739525Z","type":"recording"},{"name":"monitoring:haproxy_server_http_responses_total:sum","query":"sum by (exported_service) (rate(haproxy_server_http_responses_total{exported_namespace=\"openshift-monitoring\",exported_service=~\"alertmanager-main|prometheus-k8s\"}[5m]))","health":"ok","evaluationTime":0.000163819,"lastEvaluation":"2026-04-23T09:27:51.628048215Z","type":"recording"},{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"max by (cluster, namespace, workload, pod) (label_replace(label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"ReplicationController\"}, \"replicationcontroller\", \"$1\", \"owner_name\", \"(.*)\") * on (replicationcontroller, namespace) group_left (owner_name) topk by (replicationcontroller, namespace) (1, max by (replicationcontroller, namespace, owner_name) (kube_replicationcontroller_owner{job=\"kube-state-metrics\"})), \"workload\", \"$1\", \"owner_name\", \"(.*)\"))","labels":{"workload_type":"deploymentconfig"},"health":"ok","evaluationTime":0.000213804,"lastEvaluation":"2026-04-23T09:27:51.628214988Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.001929561,"lastEvaluation":"2026-04-23T09:27:51.626502068Z"},{"name":"openshift-sre.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-cluster-monitoring-operator-prometheus-rules-891bd8ec-1302-4127-a569-eca49d63416b.yaml","rules":[{"name":"code:apiserver_request_total:rate:sum","query":"sum by (code) (rate(apiserver_request_total{job=\"apiserver\"}[10m]))","health":"ok","evaluationTime":0.000683743,"lastEvaluation":"2026-04-23T09:27:39.148190339Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000709395,"lastEvaluation":"2026-04-23T09:27:39.14816821Z"},{"name":"kube-state-metrics","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kube-state-metrics-rules-4f67f159-ff72-4ebe-871a-cadb4e248f74.yaml","rules":[{"state":"inactive","name":"KubeStateMetricsListErrors","query":"(sum by (cluster) (rate(kube_state_metrics_list_total{job=\"kube-state-metrics\",result=\"error\"}[5m])) / sum by (cluster) (rate(kube_state_metrics_list_total{job=\"kube-state-metrics\"}[5m]))) \u003e 0.01","duration":900,"keepFiringFor":0,"labels":{"namespace":"openshift-monitoring","severity":"warning"},"annotations":{"description":"kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.","summary":"kube-state-metrics is experiencing errors in list operations."},"alerts":[],"health":"ok","evaluationTime":0.000319548,"lastEvaluation":"2026-04-23T09:27:32.607559953Z","type":"alerting"},{"state":"inactive","name":"KubeStateMetricsWatchErrors","query":"(sum by (cluster) (rate(kube_state_metrics_watch_total{job=\"kube-state-metrics\",result=\"error\"}[5m])) / sum by (cluster) (rate(kube_state_metrics_watch_total{job=\"kube-state-metrics\"}[5m]))) \u003e 0.01","duration":900,"keepFiringFor":0,"labels":{"namespace":"openshift-monitoring","severity":"warning"},"annotations":{"description":"kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.","summary":"kube-state-metrics is experiencing errors in watch operations."},"alerts":[],"health":"ok","evaluationTime":0.000175929,"lastEvaluation":"2026-04-23T09:27:32.607884387Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000527299,"lastEvaluation":"2026-04-23T09:27:32.607535511Z"},{"name":"k8s.rules.container_cpu_limits","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits","query":"kube_pod_container_resource_limits{job=\"kube-state-metrics\",resource=\"cpu\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))","health":"ok","evaluationTime":0.000954021,"lastEvaluation":"2026-04-23T09:27:52.014955949Z","type":"recording"},{"name":"namespace_cpu:kube_pod_container_resource_limits:sum","query":"sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_limits{job=\"kube-state-metrics\",resource=\"cpu\"}) * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))","health":"ok","evaluationTime":0.00072236,"lastEvaluation":"2026-04-23T09:27:52.015916958Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.001715702,"lastEvaluation":"2026-04-23T09:27:52.01492604Z"},{"name":"k8s.rules.container_cpu_requests","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests","query":"kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"cpu\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))","health":"ok","evaluationTime":0.0016074,"lastEvaluation":"2026-04-23T09:27:32.609706151Z","type":"recording"},{"name":"namespace_cpu:kube_pod_container_resource_requests:sum","query":"sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"cpu\"}) * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))","health":"ok","evaluationTime":0.001539322,"lastEvaluation":"2026-04-23T09:27:32.611322666Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.003168944,"lastEvaluation":"2026-04-23T09:27:32.609695706Z"},{"name":"k8s.rules.container_cpu_usage_seconds_total","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m","query":"sum by (cluster, namespace, pod, container) (rate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"}[5m])) * on (cluster, namespace, pod) group_left (node) topk by (cluster, namespace, pod) (1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))","health":"ok","evaluationTime":0.002453852,"lastEvaluation":"2026-04-23T09:27:37.855661813Z","type":"recording"},{"name":"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate","query":"sum by (cluster, namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"}[5m])) * on (cluster, namespace, pod) group_left (node) topk by (cluster, namespace, pod) (1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))","health":"ok","evaluationTime":0.00256642,"lastEvaluation":"2026-04-23T09:27:37.858129046Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.005059965,"lastEvaluation":"2026-04-23T09:27:37.855639053Z"},{"name":"k8s.rules.container_memory_cache","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"node_namespace_pod_container:container_memory_cache","query":"container_memory_cache{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"} * on (cluster, namespace, pod) group_left (node) topk by (cluster, namespace, pod) (1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))","health":"ok","evaluationTime":0.002170032,"lastEvaluation":"2026-04-23T09:27:47.846688919Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.002199302,"lastEvaluation":"2026-04-23T09:27:47.846663499Z"},{"name":"k8s.rules.container_memory_limits","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"cluster:namespace:pod_memory:active:kube_pod_container_resource_limits","query":"kube_pod_container_resource_limits{job=\"kube-state-metrics\",resource=\"memory\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))","health":"ok","evaluationTime":0.001015659,"lastEvaluation":"2026-04-23T09:27:54.710683211Z","type":"recording"},{"name":"namespace_memory:kube_pod_container_resource_limits:sum","query":"sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_limits{job=\"kube-state-metrics\",resource=\"memory\"}) * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))","health":"ok","evaluationTime":0.00086366,"lastEvaluation":"2026-04-23T09:27:54.711710135Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.001957719,"lastEvaluation":"2026-04-23T09:27:54.710620414Z"},{"name":"k8s.rules.container_memory_requests","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"cluster:namespace:pod_memory:active:kube_pod_container_resource_requests","query":"kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"memory\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))","health":"ok","evaluationTime":0.001864071,"lastEvaluation":"2026-04-23T09:27:41.895823902Z","type":"recording"},{"name":"namespace_memory:kube_pod_container_resource_requests:sum","query":"sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"memory\"}) * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))","health":"ok","evaluationTime":0.001627362,"lastEvaluation":"2026-04-23T09:27:41.897698306Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.003528986,"lastEvaluation":"2026-04-23T09:27:41.89579972Z"},{"name":"k8s.rules.container_memory_rss","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"node_namespace_pod_container:container_memory_rss","query":"container_memory_rss{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"} * on (cluster, namespace, pod) group_left (node) topk by (cluster, namespace, pod) (1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))","health":"ok","evaluationTime":0.002236587,"lastEvaluation":"2026-04-23T09:27:42.934949349Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.002261139,"lastEvaluation":"2026-04-23T09:27:42.934928341Z"},{"name":"k8s.rules.container_memory_swap","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"node_namespace_pod_container:container_memory_swap","query":"container_memory_swap{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"} * on (cluster, namespace, pod) group_left (node) topk by (cluster, namespace, pod) (1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))","health":"ok","evaluationTime":0.002194456,"lastEvaluation":"2026-04-23T09:27:49.457838424Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.002224509,"lastEvaluation":"2026-04-23T09:27:49.457811845Z"},{"name":"k8s.rules.container_memory_working_set_bytes","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"node_namespace_pod_container:container_memory_working_set_bytes","query":"container_memory_working_set_bytes{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"} * on (cluster, namespace, pod) group_left (node) topk by (cluster, namespace, pod) (1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))","health":"ok","evaluationTime":0.002132367,"lastEvaluation":"2026-04-23T09:27:46.269372997Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.002185866,"lastEvaluation":"2026-04-23T09:27:46.269323625Z"},{"name":"k8s.rules.pod_owner","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"max by (cluster, namespace, workload, pod) (label_replace(label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"ReplicaSet\"}, \"replicaset\", \"$1\", \"owner_name\", \"(.*)\") * on (cluster, replicaset, namespace) group_left (owner_name) topk by (cluster, replicaset, namespace) (1, max by (cluster, replicaset, namespace, owner_name) (kube_replicaset_owner{job=\"kube-state-metrics\",owner_kind=\"\"})), \"workload\", \"$1\", \"replicaset\", \"(.*)\"))","labels":{"workload_type":"replicaset"},"health":"ok","evaluationTime":0.000508035,"lastEvaluation":"2026-04-23T09:27:35.687139848Z","type":"recording"},{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"max by (cluster, namespace, workload, pod) (label_replace(label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"ReplicaSet\"}, \"replicaset\", \"$1\", \"owner_name\", \"(.*)\") * on (replicaset, namespace, cluster) group_left (owner_name) topk by (cluster, replicaset, namespace) (1, max by (cluster, replicaset, namespace, owner_name) (kube_replicaset_owner{job=\"kube-state-metrics\",owner_kind=\"Deployment\"})), \"workload\", \"$1\", \"owner_name\", \"(.*)\"))","labels":{"workload_type":"deployment"},"health":"ok","evaluationTime":0.000817452,"lastEvaluation":"2026-04-23T09:27:35.687654737Z","type":"recording"},{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"max by (cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"DaemonSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\"))","labels":{"workload_type":"daemonset"},"health":"ok","evaluationTime":0.000420276,"lastEvaluation":"2026-04-23T09:27:35.688480299Z","type":"recording"},{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"max by (cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"StatefulSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\"))","labels":{"workload_type":"statefulset"},"health":"ok","evaluationTime":0.000105749,"lastEvaluation":"2026-04-23T09:27:35.688904219Z","type":"recording"},{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"group by (cluster, namespace, workload, pod) (label_join(group by (cluster, namespace, job_name, pod, owner_name) (label_join(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"Job\"}, \"job_name\", \"\", \"owner_name\")) * on (cluster, namespace, job_name) group_left () group by (cluster, namespace, job_name) (kube_job_owner{job=\"kube-state-metrics\",owner_kind=~\"Pod|\"}), \"workload\", \"\", \"owner_name\"))","labels":{"workload_type":"job"},"health":"ok","evaluationTime":0.000186592,"lastEvaluation":"2026-04-23T09:27:35.689013269Z","type":"recording"},{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"max by (cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"\",owner_name=\"\"}, \"workload\", \"$1\", \"pod\", \"(.+)\"))","labels":{"workload_type":"barepod"},"health":"ok","evaluationTime":0.000113115,"lastEvaluation":"2026-04-23T09:27:35.689202654Z","type":"recording"},{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"max by (cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"Node\"}, \"workload\", \"$1\", \"pod\", \"(.+)\"))","labels":{"workload_type":"staticpod"},"health":"ok","evaluationTime":0.000188104,"lastEvaluation":"2026-04-23T09:27:35.689318083Z","type":"recording"},{"name":"namespace_workload_pod:kube_pod_owner:relabel","query":"group by (cluster, namespace, workload, workload_type, pod) (label_join(label_join(group by (cluster, namespace, job_name, pod) (label_join(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"Job\"}, \"job_name\", \"\", \"owner_name\")) * on (cluster, namespace, job_name) group_left (owner_kind, owner_name) group by (cluster, namespace, job_name, owner_kind, owner_name) (kube_job_owner{job=\"kube-state-metrics\",owner_kind!=\"\",owner_kind!=\"Pod\"}), \"workload\", \"\", \"owner_name\"), \"workload_type\", \"\", \"owner_kind\") or label_replace(label_replace(label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"ReplicaSet\"}, \"replicaset\", \"$1\", \"owner_name\", \"(.+)\") * on (cluster, namespace, replicaset) group_left (owner_kind, owner_name) group by (cluster, namespace, replicaset, owner_kind, owner_name) (kube_replicaset_owner{job=\"kube-state-metrics\",owner_kind!=\"\",owner_kind!=\"Deployment\"}), \"workload\", \"$1\", \"owner_name\", \"(.+)\") or label_replace(group by (cluster, namespace, pod, owner_name, owner_kind) (kube_pod_owner{job=\"kube-state-metrics\",owner_kind!=\"\",owner_kind!=\"DaemonSet\",owner_kind!=\"Job\",owner_kind!=\"Node\",owner_kind!=\"ReplicaSet\",owner_kind!=\"StatefulSet\"}), \"workload\", \"$1\", \"owner_name\", \"(.+)\"), \"workload_type\", \"$1\", \"owner_kind\", \"(.+)\"))","health":"ok","evaluationTime":0.000658408,"lastEvaluation":"2026-04-23T09:27:35.689509416Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.003052117,"lastEvaluation":"2026-04-23T09:27:35.687118334Z"},{"name":"kube-scheduler.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile","query":"histogram_quantile(0.99, sum without (instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.99"},"health":"ok","evaluationTime":0.000234877,"lastEvaluation":"2026-04-23T09:27:58.022293388Z","type":"recording"},{"name":"cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile","query":"histogram_quantile(0.99, sum without (instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.99"},"health":"ok","evaluationTime":0.000071255,"lastEvaluation":"2026-04-23T09:27:58.022533452Z","type":"recording"},{"name":"cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile","query":"histogram_quantile(0.99, sum without (instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.99"},"health":"ok","evaluationTime":0.000065682,"lastEvaluation":"2026-04-23T09:27:58.02260868Z","type":"recording"},{"name":"cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile","query":"histogram_quantile(0.9, sum without (instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.9"},"health":"ok","evaluationTime":0.000053323,"lastEvaluation":"2026-04-23T09:27:58.022677621Z","type":"recording"},{"name":"cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile","query":"histogram_quantile(0.9, sum without (instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.9"},"health":"ok","evaluationTime":0.00005753,"lastEvaluation":"2026-04-23T09:27:58.022733883Z","type":"recording"},{"name":"cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile","query":"histogram_quantile(0.9, sum without (instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.9"},"health":"ok","evaluationTime":0.00005344,"lastEvaluation":"2026-04-23T09:27:58.022793922Z","type":"recording"},{"name":"cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile","query":"histogram_quantile(0.5, sum without (instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.5"},"health":"ok","evaluationTime":0.000054262,"lastEvaluation":"2026-04-23T09:27:58.022849752Z","type":"recording"},{"name":"cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile","query":"histogram_quantile(0.5, sum without (instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.5"},"health":"ok","evaluationTime":0.000061817,"lastEvaluation":"2026-04-23T09:27:58.022906315Z","type":"recording"},{"name":"cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile","query":"histogram_quantile(0.5, sum without (instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job=\"scheduler\"}[5m])))","labels":{"quantile":"0.5"},"health":"ok","evaluationTime":0.000081998,"lastEvaluation":"2026-04-23T09:27:58.022971424Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000789529,"lastEvaluation":"2026-04-23T09:27:58.022267013Z"},{"name":"kubelet.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile","query":"histogram_quantile(0.99, sum by (cluster, instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"kubelet\",metrics_path=\"/metrics\"}[5m])) * on (cluster, instance) group_left (node) max by (cluster, instance, node) (kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"}))","labels":{"quantile":"0.99"},"health":"ok","evaluationTime":0.000562448,"lastEvaluation":"2026-04-23T09:27:31.427397305Z","type":"recording"},{"name":"node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile","query":"histogram_quantile(0.9, sum by (cluster, instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"kubelet\",metrics_path=\"/metrics\"}[5m])) * on (cluster, instance) group_left (node) max by (cluster, instance, node) (kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"}))","labels":{"quantile":"0.9"},"health":"ok","evaluationTime":0.000426765,"lastEvaluation":"2026-04-23T09:27:31.427966048Z","type":"recording"},{"name":"node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile","query":"histogram_quantile(0.5, sum by (cluster, instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"kubelet\",metrics_path=\"/metrics\"}[5m])) * on (cluster, instance) group_left (node) max by (cluster, instance, node) (kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"}))","labels":{"quantile":"0.5"},"health":"ok","evaluationTime":0.000372649,"lastEvaluation":"2026-04-23T09:27:31.428398916Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.001399271,"lastEvaluation":"2026-04-23T09:27:31.427374307Z"},{"name":"kubernetes-apps","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"state":"inactive","name":"KubePodCrashLooping","query":"max_over_time(kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",reason=\"CrashLoopBackOff\"}[5m]) \u003e= 1","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: \"CrashLoopBackOff\").","summary":"Pod is crash looping."},"alerts":[],"health":"ok","evaluationTime":0.000254145,"lastEvaluation":"2026-04-23T09:27:35.875970392Z","type":"alerting"},{"state":"inactive","name":"KubePodNotReady","query":"sum by (namespace, pod, cluster) (max by (namespace, pod, cluster) (kube_pod_status_phase{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",phase=~\"Pending|Unknown\"} unless ignoring (phase) (kube_pod_status_unschedulable{job=\"kube-state-metrics\"} == 1)) * on (namespace, pod, cluster) group_left (owner_kind) topk by (namespace, pod, cluster) (1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"}))) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePodNotReady.md","summary":"Pod has been in a non-ready state for more than 15 minutes."},"alerts":[],"health":"ok","evaluationTime":0.001548009,"lastEvaluation":"2026-04-23T09:27:35.876229914Z","type":"alerting"},{"state":"inactive","name":"KubeDeploymentGenerationMismatch","query":"kube_deployment_status_observed_generation{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != kube_deployment_metadata_generation{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.","summary":"Deployment generation mismatch due to possible roll-back"},"alerts":[],"health":"ok","evaluationTime":0.000347635,"lastEvaluation":"2026-04-23T09:27:35.877785288Z","type":"alerting"},{"state":"inactive","name":"KubeDeploymentRolloutStuck","query":"kube_deployment_status_condition{condition=\"Progressing\",job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",status=\"false\"} != 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes.","summary":"Deployment rollout is not progressing."},"alerts":[],"health":"ok","evaluationTime":0.000182825,"lastEvaluation":"2026-04-23T09:27:35.878137413Z","type":"alerting"},{"state":"inactive","name":"KubeStatefulSetReplicasMismatch","query":"(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != kube_statefulset_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) and (changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}[10m]) == 0)","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.","summary":"StatefulSet has not matched the expected number of replicas."},"alerts":[],"health":"ok","evaluationTime":0.000292218,"lastEvaluation":"2026-04-23T09:27:35.878324308Z","type":"alerting"},{"state":"inactive","name":"KubeStatefulSetGenerationMismatch","query":"kube_statefulset_status_observed_generation{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != kube_statefulset_metadata_generation{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.","summary":"StatefulSet generation mismatch due to possible roll-back"},"alerts":[],"health":"ok","evaluationTime":0.000149657,"lastEvaluation":"2026-04-23T09:27:35.878620089Z","type":"alerting"},{"state":"inactive","name":"KubeStatefulSetUpdateNotRolledOut","query":"(max by (namespace, statefulset, job, cluster) (kube_statefulset_status_current_revision{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} unless kube_statefulset_status_update_revision{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) * on (namespace, statefulset, job, cluster) (kube_statefulset_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"})) and on (namespace, statefulset, job, cluster) (changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}[5m]) == 0)","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.","summary":"StatefulSet update has not been rolled out."},"alerts":[],"health":"ok","evaluationTime":0.000392613,"lastEvaluation":"2026-04-23T09:27:35.878772406Z","type":"alerting"},{"state":"inactive","name":"KubeDaemonSetRolloutStuck","query":"((kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) or (kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != 0) or (kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) or (kube_daemonset_status_number_available{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"})) and (changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}[5m]) == 0)","duration":1800,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 30 minutes.","summary":"DaemonSet rollout is stuck."},"alerts":[],"health":"ok","evaluationTime":0.000858017,"lastEvaluation":"2026-04-23T09:27:35.879168774Z","type":"alerting"},{"state":"inactive","name":"KubeContainerWaiting","query":"kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",reason!=\"CrashLoopBackOff\"} \u003e 0","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: \"{{ $labels.reason }}\").","summary":"Pod container waiting longer than 1 hour"},"alerts":[],"health":"ok","evaluationTime":0.000128588,"lastEvaluation":"2026-04-23T09:27:35.880031475Z","type":"alerting"},{"state":"inactive","name":"KubeDaemonSetNotScheduled","query":"kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} - kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e 0","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.","summary":"DaemonSet pods are not scheduled."},"alerts":[],"health":"ok","evaluationTime":0.000279341,"lastEvaluation":"2026-04-23T09:27:35.880163213Z","type":"alerting"},{"state":"inactive","name":"KubeDaemonSetMisScheduled","query":"kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.","summary":"DaemonSet pods are misscheduled."},"alerts":[],"health":"ok","evaluationTime":0.000119459,"lastEvaluation":"2026-04-23T09:27:35.880446042Z","type":"alerting"},{"state":"inactive","name":"KubeJobNotCompleted","query":"time() - max by (namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} and kube_job_status_active{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e 0) \u003e 43200","duration":0,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete.","summary":"Job did not complete in time"},"alerts":[],"health":"ok","evaluationTime":0.000220339,"lastEvaluation":"2026-04-23T09:27:35.88056844Z","type":"alerting"},{"state":"inactive","name":"KubeJobFailed","query":"kube_job_failed{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubeJobFailed.md","summary":"Job failed to complete."},"alerts":[],"health":"ok","evaluationTime":0.00007954,"lastEvaluation":"2026-04-23T09:27:35.880791728Z","type":"alerting"},{"state":"inactive","name":"KubeHpaReplicasMismatch","query":"(kube_horizontalpodautoscaler_status_desired_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} != kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) and (kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003e kube_horizontalpodautoscaler_spec_min_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) and (kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} \u003c kube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) and changes(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}[15m]) == 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }} has not matched the desired number of replicas for longer than 15 minutes.","summary":"HPA has not matched desired number of replicas."},"alerts":[],"health":"ok","evaluationTime":0.000378169,"lastEvaluation":"2026-04-23T09:27:35.880873863Z","type":"alerting"},{"state":"inactive","name":"KubeHpaMaxedOut","query":"kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} == kube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }} has been running at max replicas for longer than 15 minutes.","summary":"HPA is running at max replicas"},"alerts":[],"health":"ok","evaluationTime":0.000126922,"lastEvaluation":"2026-04-23T09:27:35.881255523Z","type":"alerting"},{"state":"inactive","name":"KubePdbNotEnoughHealthyPods","query":"(kube_poddisruptionbudget_status_desired_healthy{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"} - kube_poddisruptionbudget_status_current_healthy{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"PDB {{ $labels.namespace }}/{{ $labels.poddisruptionbudget }} expects {{ $value }} more healthy pods. The desired number of healthy pods has not been met for at least 15m.","summary":"PDB does not have enough healthy pods."},"alerts":[],"health":"ok","evaluationTime":0.000192366,"lastEvaluation":"2026-04-23T09:27:35.881386289Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.005632041,"lastEvaluation":"2026-04-23T09:27:35.87594916Z"},{"name":"kubernetes-resources","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"state":"inactive","name":"KubeCPUOvercommit","query":"(sum(namespace_cpu:kube_pod_container_resource_requests:sum) - sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\"}) \u003e 0 and count(max by (node) (kube_node_role{job=\"kube-state-metrics\",role=\"control-plane\"})) \u003c 3) or (sum(namespace_cpu:kube_pod_container_resource_requests:sum) - (sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\"}) - max(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\"})) \u003e 0 and (sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\"}) - max(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\"})) \u003e 0)","duration":600,"keepFiringFor":0,"labels":{"namespace":"kube-system","severity":"warning"},"annotations":{"description":"Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.","summary":"Cluster has overcommitted CPU resource requests."},"alerts":[],"health":"ok","evaluationTime":0.000671766,"lastEvaluation":"2026-04-23T09:27:43.721914665Z","type":"alerting"},{"state":"inactive","name":"KubeMemoryOvercommit","query":"(sum(namespace_memory:kube_pod_container_resource_requests:sum) - sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\"}) \u003e 0 and count(max by (node) (kube_node_role{job=\"kube-state-metrics\",role=\"control-plane\"})) \u003c 3) or (sum(namespace_memory:kube_pod_container_resource_requests:sum) - (sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\"}) - max(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\"})) \u003e 0 and (sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\"}) - max(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\"})) \u003e 0)","duration":600,"keepFiringFor":0,"labels":{"namespace":"kube-system","severity":"warning"},"annotations":{"description":"Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.","summary":"Cluster has overcommitted memory resource requests."},"alerts":[],"health":"ok","evaluationTime":0.000502685,"lastEvaluation":"2026-04-23T09:27:43.722593729Z","type":"alerting"},{"state":"inactive","name":"KubeQuotaAlmostFull","query":"kube_resourcequota{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",type=\"used\"} / ignoring (instance, job, type) (kube_resourcequota{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",type=\"hard\"} \u003e 0) \u003e 0.9 \u003c 1","duration":900,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.","summary":"Namespace quota is going to be full."},"alerts":[],"health":"ok","evaluationTime":0.000370212,"lastEvaluation":"2026-04-23T09:27:43.723100988Z","type":"alerting"},{"state":"inactive","name":"KubeQuotaFullyUsed","query":"kube_resourcequota{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",type=\"used\"} / ignoring (instance, job, type) (kube_resourcequota{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",type=\"hard\"} \u003e 0) == 1","duration":900,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.","summary":"Namespace quota is fully used."},"alerts":[],"health":"ok","evaluationTime":0.000235819,"lastEvaluation":"2026-04-23T09:27:43.723475326Z","type":"alerting"},{"state":"inactive","name":"KubeQuotaExceeded","query":"kube_resourcequota{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",type=\"used\"} / ignoring (instance, job, type) (kube_resourcequota{job=\"kube-state-metrics\",namespace=~\"(openshift-.*|kube-.*|default)\",type=\"hard\"} \u003e 0) \u003e 1","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.","summary":"Namespace quota has exceeded the limits."},"alerts":[],"health":"ok","evaluationTime":0.000206941,"lastEvaluation":"2026-04-23T09:27:43.72371494Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.002032447,"lastEvaluation":"2026-04-23T09:27:43.721891772Z"},{"name":"kubernetes-system","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"state":"inactive","name":"KubeClientErrors","query":"(sum by (cluster, instance, job, namespace) (rate(rest_client_requests_total{code=~\"5..\",job=\"apiserver\"}[5m])) / sum by (cluster, instance, job, namespace) (rate(rest_client_requests_total{job=\"apiserver\"}[5m]))) \u003e 0.01","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.","summary":"Kubernetes API server client is experiencing errors."},"alerts":[],"health":"ok","evaluationTime":0.00059322,"lastEvaluation":"2026-04-23T09:27:58.685439703Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000638879,"lastEvaluation":"2026-04-23T09:27:58.68540092Z"},{"name":"kubernetes-system-kubelet","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"state":"inactive","name":"KubeNodeNotReady","query":"kube_node_status_condition{condition=\"Ready\",job=\"kube-state-metrics\",status=\"true\"} == 0 and on (cluster, node) kube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $labels.node }} has been unready for more than 15 minutes.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubeNodeNotReady.md","summary":"Node is not ready."},"alerts":[],"health":"ok","evaluationTime":0.000250378,"lastEvaluation":"2026-04-23T09:27:43.951078793Z","type":"alerting"},{"state":"pending","name":"KubeNodePressure","query":"kube_node_status_condition{condition=~\"(MemoryPressure|DiskPressure|PIDPressure)\",job=\"kube-state-metrics\",status=\"true\"} == 1 and on (cluster, node) kube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0","duration":600,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"{{ $labels.node }} has active Condition {{ $labels.condition }}. This is caused by resource usage exceeding eviction thresholds.","summary":"Node has as active Condition."},"alerts":[{"labels":{"alertname":"KubeNodePressure","condition":"DiskPressure","container":"kube-rbac-proxy-main","endpoint":"https-main","job":"kube-state-metrics","namespace":"openshift-monitoring","node":"ip-10-0-131-47.ec2.internal","service":"kube-state-metrics","severity":"info","status":"true"},"annotations":{"description":"ip-10-0-131-47.ec2.internal has active Condition DiskPressure. This is caused by resource usage exceeding eviction thresholds.","summary":"Node has as active Condition."},"state":"pending","activeAt":"2026-04-23T09:24:43.950436918Z","value":"1e+00"}],"health":"ok","evaluationTime":0.000425333,"lastEvaluation":"2026-04-23T09:27:43.951335957Z","type":"alerting"},{"state":"inactive","name":"KubeNodeUnreachable","query":"(kube_node_spec_taint{effect=\"NoSchedule\",job=\"kube-state-metrics\",key=\"node.kubernetes.io/unreachable\"} unless ignoring (key, value) kube_node_spec_taint{job=\"kube-state-metrics\",key=~\"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn\"}) == 1","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $labels.node }} is unreachable and some workloads may be rescheduled.","summary":"Node is unreachable."},"alerts":[],"health":"ok","evaluationTime":0.000218902,"lastEvaluation":"2026-04-23T09:27:43.95176767Z","type":"alerting"},{"state":"inactive","name":"KubeletTooManyPods","query":"(max by (cluster, instance) (kubelet_running_pods{job=\"kubelet\",metrics_path=\"/metrics\"} \u003e 1) * on (cluster, instance) group_left (node) max by (cluster, instance, node) (kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"})) / on (cluster, node) group_left () max by (cluster, node) (kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1) \u003e 0.95","duration":900,"keepFiringFor":0,"labels":{"namespace":"kube-system","severity":"info"},"annotations":{"description":"Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.","summary":"Kubelet is running at capacity."},"alerts":[],"health":"ok","evaluationTime":0.000257645,"lastEvaluation":"2026-04-23T09:27:43.951990738Z","type":"alerting"},{"state":"inactive","name":"KubeNodeReadinessFlapping","query":"sum by (cluster, node) (changes(kube_node_status_condition{condition=\"Ready\",job=\"kube-state-metrics\",status=\"true\"}[15m])) \u003e 2 and on (cluster, node) kube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0","duration":900,"keepFiringFor":0,"labels":{"namespace":"kube-system","severity":"warning"},"annotations":{"description":"The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.","summary":"Node readiness status is flapping."},"alerts":[],"health":"ok","evaluationTime":0.000167065,"lastEvaluation":"2026-04-23T09:27:43.952252617Z","type":"alerting"},{"state":"inactive","name":"KubeNodeEviction","query":"sum by (cluster, eviction_signal, instance) (rate(kubelet_evictions{job=\"kubelet\",metrics_path=\"/metrics\"}[15m])) * on (cluster, instance) group_left (node) max by (cluster, instance, node) (kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"}) \u003e 0","duration":0,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"Node {{ $labels.node }} is evicting Pods due to {{ $labels.eviction_signal }}.  Eviction occurs when eviction thresholds are crossed, typically caused by Pods exceeding RAM/ephemeral-storage limits.","summary":"Node is evicting pods."},"alerts":[],"health":"ok","evaluationTime":0.000126256,"lastEvaluation":"2026-04-23T09:27:43.952423144Z","type":"alerting"},{"state":"inactive","name":"KubeletPlegDurationHigh","query":"node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile=\"0.99\"} \u003e= 10","duration":300,"keepFiringFor":0,"labels":{"namespace":"kube-system","severity":"warning"},"annotations":{"description":"The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.","summary":"Kubelet Pod Lifecycle Event Generator is taking too long to relist."},"alerts":[],"health":"ok","evaluationTime":0.000058001,"lastEvaluation":"2026-04-23T09:27:43.952552767Z","type":"alerting"},{"state":"inactive","name":"KubeletPodStartUpLatencyHigh","query":"histogram_quantile(0.99, sum by (cluster, instance, le) (rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\",metrics_path=\"/metrics\"}[5m]))) * on (cluster, instance) group_left (node) kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"} \u003e 60","duration":900,"keepFiringFor":0,"labels":{"namespace":"kube-system","severity":"warning"},"annotations":{"description":"Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.","summary":"Kubelet Pod startup latency is too high."},"alerts":[],"health":"ok","evaluationTime":0.000679858,"lastEvaluation":"2026-04-23T09:27:43.952613794Z","type":"alerting"},{"state":"inactive","name":"KubeletClientCertificateRenewalErrors","query":"increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes).","summary":"Kubelet has failed to renew its client certificate."},"alerts":[],"health":"ok","evaluationTime":0.000104808,"lastEvaluation":"2026-04-23T09:27:43.953297779Z","type":"alerting"},{"state":"inactive","name":"KubeletServerCertificateRenewalErrors","query":"increase(kubelet_server_expiration_renew_errors[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).","summary":"Kubelet has failed to renew its server certificate."},"alerts":[],"health":"ok","evaluationTime":0.000068,"lastEvaluation":"2026-04-23T09:27:43.953405742Z","type":"alerting"},{"state":"inactive","name":"KubeletDown","query":"absent(up{job=\"kubelet\",metrics_path=\"/metrics\"} == 1)","duration":900,"keepFiringFor":0,"labels":{"namespace":"kube-system","severity":"critical"},"annotations":{"description":"Kubelet has disappeared from Prometheus target discovery.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubeletDown.md","summary":"Target disappeared from Prometheus target discovery."},"alerts":[],"health":"ok","evaluationTime":0.000072647,"lastEvaluation":"2026-04-23T09:27:43.9534766Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.002498442,"lastEvaluation":"2026-04-23T09:27:43.951052941Z"},{"name":"node.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-7b43a1eb-da93-4dd3-9550-1b8dfa2e8b07.yaml","rules":[{"name":"node_namespace_pod:kube_pod_info:","query":"topk by (cluster, namespace, pod) (1, max by (cluster, node, namespace, pod) (label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")))","health":"ok","evaluationTime":0.001051151,"lastEvaluation":"2026-04-23T09:27:52.037110725Z","type":"recording"},{"name":":node_memory_MemAvailable_bytes:sum","query":"sum by (cluster) (node_memory_MemAvailable_bytes{job=\"node-exporter\"} or (node_memory_Buffers_bytes{job=\"node-exporter\"} + node_memory_Cached_bytes{job=\"node-exporter\"} + node_memory_MemFree_bytes{job=\"node-exporter\"} + node_memory_Slab_bytes{job=\"node-exporter\"}))","health":"ok","evaluationTime":0.000297506,"lastEvaluation":"2026-04-23T09:27:52.038170648Z","type":"recording"},{"name":"node:node_cpu_utilization:ratio_rate5m","query":"avg by (cluster, node) (sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\",mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m])))","health":"ok","evaluationTime":0.000737607,"lastEvaluation":"2026-04-23T09:27:52.038472548Z","type":"recording"},{"name":"cluster:node_cpu:ratio_rate5m","query":"avg by (cluster) (node:node_cpu_utilization:ratio_rate5m)","health":"ok","evaluationTime":0.000057387,"lastEvaluation":"2026-04-23T09:27:52.039215052Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.002183016,"lastEvaluation":"2026-04-23T09:27:52.0370916Z"},{"name":"node-exporter","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-node-exporter-rules-e8e19de9-d900-4001-99b1-8dd7a95c292d.yaml","rules":[{"state":"inactive","name":"NodeFilesystemSpaceFillingUp","query":"(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} * 100 \u003c 15 and predict_linear(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"}[6h], 24 * 60 * 60) \u003c 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} == 0)","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available space left and is filling up.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemSpaceFillingUp.md","summary":"Filesystem is predicted to run out of space within the next 24 hours."},"alerts":[],"health":"ok","evaluationTime":0.001543035,"lastEvaluation":"2026-04-23T09:27:51.889875935Z","type":"alerting"},{"state":"inactive","name":"NodeFilesystemSpaceFillingUp","query":"(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} * 100 \u003c 10 and predict_linear(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"}[6h], 4 * 60 * 60) \u003c 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} == 0)","duration":3600,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available space left and is filling up fast.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemSpaceFillingUp.md","summary":"Filesystem is predicted to run out of space within the next 4 hours."},"alerts":[],"health":"ok","evaluationTime":0.001258461,"lastEvaluation":"2026-04-23T09:27:51.891426912Z","type":"alerting"},{"state":"inactive","name":"NodeFilesystemAlmostOutOfSpace","query":"(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} * 100 \u003c 5 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} == 0)","duration":1800,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available space left.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemAlmostOutOfSpace.md","summary":"Filesystem has less than 5% space left."},"alerts":[],"health":"ok","evaluationTime":0.000938923,"lastEvaluation":"2026-04-23T09:27:51.892690788Z","type":"alerting"},{"state":"inactive","name":"NodeFilesystemAlmostOutOfSpace","query":"(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} * 100 \u003c 3 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} == 0)","duration":1800,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available space left.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemAlmostOutOfSpace.md","summary":"Filesystem has less than 3% space left."},"alerts":[],"health":"ok","evaluationTime":0.000879313,"lastEvaluation":"2026-04-23T09:27:51.893634806Z","type":"alerting"},{"state":"inactive","name":"NodeFilesystemFilesFillingUp","query":"(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} * 100 \u003c 40 and predict_linear(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"}[6h], 24 * 60 * 60) \u003c 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} == 0)","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available inodes left and is filling up.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemFilesFillingUp.md","summary":"Filesystem is predicted to run out of inodes within the next 24 hours."},"alerts":[],"health":"ok","evaluationTime":0.001250086,"lastEvaluation":"2026-04-23T09:27:51.894518243Z","type":"alerting"},{"state":"inactive","name":"NodeFilesystemFilesFillingUp","query":"(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} * 100 \u003c 20 and predict_linear(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"}[6h], 4 * 60 * 60) \u003c 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} == 0)","duration":3600,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available inodes left and is filling up fast.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemFilesFillingUp.md","summary":"Filesystem is predicted to run out of inodes within the next 4 hours."},"alerts":[],"health":"ok","evaluationTime":0.001216133,"lastEvaluation":"2026-04-23T09:27:51.895773237Z","type":"alerting"},{"state":"inactive","name":"NodeFilesystemAlmostOutOfFiles","query":"(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} * 100 \u003c 5 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} == 0)","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available inodes left.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemAlmostOutOfFiles.md","summary":"Filesystem has less than 5% inodes left."},"alerts":[],"health":"ok","evaluationTime":0.000867741,"lastEvaluation":"2026-04-23T09:27:51.896993933Z","type":"alerting"},{"state":"inactive","name":"NodeFilesystemAlmostOutOfFiles","query":"(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} * 100 \u003c 3 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\",mountpoint!~\"/var/lib/ibmc-s3fs.*\"} == 0)","duration":3600,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available inodes left.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemAlmostOutOfFiles.md","summary":"Filesystem has less than 3% inodes left."},"alerts":[],"health":"ok","evaluationTime":0.000939083,"lastEvaluation":"2026-04-23T09:27:51.897865812Z","type":"alerting"},{"state":"inactive","name":"NodeNetworkReceiveErrs","query":"rate(node_network_receive_errs_total{job=\"node-exporter\"}[2m]) / rate(node_network_receive_packets_total{job=\"node-exporter\"}[2m]) \u003e 0.01","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.","summary":"Network interface is reporting many receive errors."},"alerts":[],"health":"ok","evaluationTime":0.000249963,"lastEvaluation":"2026-04-23T09:27:51.898810352Z","type":"alerting"},{"state":"inactive","name":"NodeNetworkTransmitErrs","query":"rate(node_network_transmit_errs_total{job=\"node-exporter\"}[2m]) / rate(node_network_transmit_packets_total{job=\"node-exporter\"}[2m]) \u003e 0.01","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last two minutes.","summary":"Network interface is reporting many transmit errors."},"alerts":[],"health":"ok","evaluationTime":0.000208297,"lastEvaluation":"2026-04-23T09:27:51.899063744Z","type":"alerting"},{"state":"inactive","name":"NodeHighNumberConntrackEntriesUsed","query":"(node_nf_conntrack_entries{job=\"node-exporter\"} / node_nf_conntrack_entries_limit) \u003e 0.75","duration":0,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $value | humanizePercentage }} of conntrack entries are used.","summary":"Number of conntrack are getting close to the limit."},"alerts":[],"health":"ok","evaluationTime":0.000141761,"lastEvaluation":"2026-04-23T09:27:51.899274984Z","type":"alerting"},{"state":"inactive","name":"NodeTextFileCollectorScrapeError","query":"node_textfile_scrape_error{job=\"node-exporter\"} == 1","duration":0,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Node Exporter text file collector on {{ $labels.instance }} failed to scrape.","summary":"Node Exporter text file collector failed to scrape."},"alerts":[],"health":"ok","evaluationTime":0.000056387,"lastEvaluation":"2026-04-23T09:27:51.899419908Z","type":"alerting"},{"state":"inactive","name":"NodeClockSkewDetected","query":"((node_timex_offset_seconds{job=\"node-exporter\"} \u003e 0.05 and deriv(node_timex_offset_seconds{job=\"node-exporter\"}[5m]) \u003e= 0) or (node_timex_offset_seconds{job=\"node-exporter\"} \u003c -0.05 and deriv(node_timex_offset_seconds{job=\"node-exporter\"}[5m]) \u003c= 0)) and on () absent(up{job=\"ptp-monitor-service\"})","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Clock at {{ $labels.instance }} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host.","summary":"Clock skew detected."},"alerts":[],"health":"ok","evaluationTime":0.000241782,"lastEvaluation":"2026-04-23T09:27:51.899478732Z","type":"alerting"},{"state":"inactive","name":"NodeClockNotSynchronising","query":"(min_over_time(node_timex_sync_status{job=\"node-exporter\"}[5m]) == 0 and node_timex_maxerror_seconds{job=\"node-exporter\"} \u003e= 16) and on () absent(up{job=\"ptp-monitor-service\"})","duration":600,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"Clock at {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeClockNotSynchronising.md","summary":"Clock not synchronising."},"alerts":[],"health":"ok","evaluationTime":0.00014735,"lastEvaluation":"2026-04-23T09:27:51.899723719Z","type":"alerting"},{"state":"inactive","name":"NodeRAIDDegraded","query":"node_md_disks_required{device=~\"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\",job=\"node-exporter\"} - ignoring (state) (node_md_disks{device=~\"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\",job=\"node-exporter\",state=\"active\"}) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"RAID array '{{ $labels.device }}' at {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeRAIDDegraded.md","summary":"RAID Array is degraded."},"alerts":[],"health":"ok","evaluationTime":0.000182635,"lastEvaluation":"2026-04-23T09:27:51.89987398Z","type":"alerting"},{"state":"inactive","name":"NodeRAIDDiskFailure","query":"node_md_disks{device=~\"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\",job=\"node-exporter\",state=\"failed\"} \u003e 0","duration":0,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"At least one device in RAID array at {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.","summary":"Failed device in RAID array."},"alerts":[],"health":"ok","evaluationTime":0.000091642,"lastEvaluation":"2026-04-23T09:27:51.900059658Z","type":"alerting"},{"state":"inactive","name":"NodeFileDescriptorLimit","query":"(node_filefd_allocated{job=\"node-exporter\"} * 100 / node_filefd_maximum{job=\"node-exporter\"} \u003e 70)","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"File descriptors limit at {{ $labels.instance }} is currently at {{ printf \"%.2f\" $value }}%.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFileDescriptorLimit.md","summary":"Kernel is predicted to exhaust file descriptors limit soon."},"alerts":[],"health":"ok","evaluationTime":0.00011375,"lastEvaluation":"2026-04-23T09:27:51.900153939Z","type":"alerting"},{"state":"inactive","name":"NodeFileDescriptorLimit","query":"(node_filefd_allocated{job=\"node-exporter\"} * 100 / node_filefd_maximum{job=\"node-exporter\"} \u003e 90)","duration":900,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"File descriptors limit at {{ $labels.instance }} is currently at {{ printf \"%.2f\" $value }}%.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFileDescriptorLimit.md","summary":"Kernel is predicted to exhaust file descriptors limit soon."},"alerts":[],"health":"ok","evaluationTime":0.000116654,"lastEvaluation":"2026-04-23T09:27:51.900270328Z","type":"alerting"},{"state":"inactive","name":"NodeSystemSaturation","query":"node_load1{job=\"node-exporter\"} / count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}) \u003e 2","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf \"%.2f\" $value }}.\nThis might indicate this instance resources saturation and can cause it becoming unresponsive.\n","summary":"System saturated, load per core is very high."},"alerts":[],"health":"ok","evaluationTime":0.000269113,"lastEvaluation":"2026-04-23T09:27:51.900389644Z","type":"alerting"},{"state":"inactive","name":"NodeMemoryMajorPagesFaults","query":"rate(node_vmstat_pgmajfault{job=\"node-exporter\"}[5m]) \u003e 500","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf \"%.2f\" $value }}.\nPlease check that there is enough memory available at this instance.\n","summary":"Memory major page faults are occurring at very high rate."},"alerts":[],"health":"ok","evaluationTime":0.000084707,"lastEvaluation":"2026-04-23T09:27:51.900661589Z","type":"alerting"},{"state":"inactive","name":"NodeSystemdServiceFailed","query":"node_systemd_unit_state{job=\"node-exporter\",state=\"failed\"} == 1","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Systemd service {{ $labels.name }} has entered failed state at {{ $labels.instance }}","summary":"Systemd service has entered failed state."},"alerts":[],"health":"ok","evaluationTime":0.000045922,"lastEvaluation":"2026-04-23T09:27:51.900749655Z","type":"alerting"},{"state":"inactive","name":"NodeBondingDegraded","query":"(node_bonding_slaves - node_bonding_active) != 0","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Bonding interface {{ $labels.master }} on {{ $labels.instance }} is in degraded state due to one or more slave failures.","summary":"Bonding interface is degraded"},"alerts":[],"health":"ok","evaluationTime":0.000052757,"lastEvaluation":"2026-04-23T09:27:51.900797963Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.010999753,"lastEvaluation":"2026-04-23T09:27:51.889853273Z"},{"name":"node-exporter.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-node-exporter-rules-e8e19de9-d900-4001-99b1-8dd7a95c292d.yaml","rules":[{"name":"instance:node_num_cpu:sum","query":"count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"})","health":"ok","evaluationTime":0.000544582,"lastEvaluation":"2026-04-23T09:28:00.119440356Z","type":"recording"},{"name":"instance:node_cpu_utilisation:rate1m","query":"1 - avg without (cpu) (sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\",mode=~\"idle|iowait|steal\"}[1m])))","health":"ok","evaluationTime":0.00087821,"lastEvaluation":"2026-04-23T09:28:00.11999466Z","type":"recording"},{"name":"instance:node_load1_per_cpu:ratio","query":"(node_load1{job=\"node-exporter\"} / instance:node_num_cpu:sum{job=\"node-exporter\"})","health":"ok","evaluationTime":0.000212534,"lastEvaluation":"2026-04-23T09:28:00.120881835Z","type":"recording"},{"name":"instance:node_memory_utilisation:ratio","query":"1 - ((node_memory_MemAvailable_bytes{job=\"node-exporter\"} or (node_memory_Buffers_bytes{job=\"node-exporter\"} + node_memory_Cached_bytes{job=\"node-exporter\"} + node_memory_MemFree_bytes{job=\"node-exporter\"} + node_memory_Slab_bytes{job=\"node-exporter\"})) / node_memory_MemTotal_bytes{job=\"node-exporter\"})","health":"ok","evaluationTime":0.000476011,"lastEvaluation":"2026-04-23T09:28:00.121100682Z","type":"recording"},{"name":"instance:node_vmstat_pgmajfault:rate1m","query":"rate(node_vmstat_pgmajfault{job=\"node-exporter\"}[1m])","health":"ok","evaluationTime":0.000135548,"lastEvaluation":"2026-04-23T09:28:00.121583603Z","type":"recording"},{"name":"instance_device:node_disk_io_time_seconds:rate1m","query":"rate(node_disk_io_time_seconds_total{device=~\"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\",job=\"node-exporter\"}[1m])","health":"ok","evaluationTime":0.000234405,"lastEvaluation":"2026-04-23T09:28:00.12172776Z","type":"recording"},{"name":"instance_device:node_disk_io_time_weighted_seconds:rate1m","query":"rate(node_disk_io_time_weighted_seconds_total{device=~\"mmcblk.p.+|nvme.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\",job=\"node-exporter\"}[1m])","health":"ok","evaluationTime":0.000189853,"lastEvaluation":"2026-04-23T09:28:00.121967191Z","type":"recording"},{"name":"instance:node_network_receive_bytes_excluding_lo:rate1m","query":"sum without (device) (rate(node_network_receive_bytes_total{device!=\"lo\",job=\"node-exporter\"}[1m]))","health":"ok","evaluationTime":0.000239145,"lastEvaluation":"2026-04-23T09:28:00.122161784Z","type":"recording"},{"name":"instance:node_network_transmit_bytes_excluding_lo:rate1m","query":"sum without (device) (rate(node_network_transmit_bytes_total{device!=\"lo\",job=\"node-exporter\"}[1m]))","health":"ok","evaluationTime":0.000206281,"lastEvaluation":"2026-04-23T09:28:00.122406706Z","type":"recording"},{"name":"instance:node_network_receive_drop_excluding_lo:rate1m","query":"sum without (device) (rate(node_network_receive_drop_total{device!=\"lo\",job=\"node-exporter\"}[1m]))","health":"ok","evaluationTime":0.000165665,"lastEvaluation":"2026-04-23T09:28:00.122617578Z","type":"recording"},{"name":"instance:node_network_transmit_drop_excluding_lo:rate1m","query":"sum without (device) (rate(node_network_transmit_drop_total{device!=\"lo\",job=\"node-exporter\"}[1m]))","health":"ok","evaluationTime":0.000156978,"lastEvaluation":"2026-04-23T09:28:00.122788069Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.003542435,"lastEvaluation":"2026-04-23T09:28:00.119405328Z"},{"name":"telemetry","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-node-exporter-rules-e8e19de9-d900-4001-99b1-8dd7a95c292d.yaml","rules":[{"name":"vendor_model:node_accelerator_cards:sum","query":"sum by (vendor, model) (node_accelerator_card_info)","health":"ok","evaluationTime":0.00014594,"lastEvaluation":"2026-04-23T09:27:54.711728436Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000159284,"lastEvaluation":"2026-04-23T09:27:54.711717926Z"},{"name":"prometheus","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-prometheus-k8s-prometheus-rules-bdb0550d-3a0a-4985-9837-a74cdfb393db.yaml","rules":[{"state":"inactive","name":"PrometheusBadConfig","query":"max_over_time(prometheus_config_last_reload_successful{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) == 0","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.","summary":"Failed Prometheus configuration reload."},"alerts":[],"health":"ok","evaluationTime":0.000214847,"lastEvaluation":"2026-04-23T09:27:35.149299015Z","type":"alerting"},{"state":"inactive","name":"PrometheusSDRefreshFailure","query":"increase(prometheus_sd_refresh_failures_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[10m]) \u003e 0","duration":1200,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to refresh SD with mechanism {{$labels.mechanism}}.","summary":"Failed Prometheus SD refresh."},"alerts":[],"health":"ok","evaluationTime":0.000062814,"lastEvaluation":"2026-04-23T09:27:35.149518482Z","type":"alerting"},{"state":"inactive","name":"PrometheusKubernetesListWatchFailures","query":"increase(prometheus_sd_kubernetes_failures_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Kubernetes service discovery of Prometheus {{$labels.namespace}}/{{$labels.pod}} is experiencing {{ printf \"%.0f\" $value }} failures with LIST/WATCH requests to the Kubernetes API in the last 5 minutes.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/PrometheusKubernetesListWatchFailures.md","summary":"Requests in Kubernetes SD are failing."},"alerts":[],"health":"ok","evaluationTime":0.000059074,"lastEvaluation":"2026-04-23T09:27:35.149584015Z","type":"alerting"},{"state":"inactive","name":"PrometheusNotificationQueueRunningFull","query":"(predict_linear(prometheus_notifications_queue_length{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m], 60 * 30) \u003e min_over_time(prometheus_notifications_queue_capacity{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]))","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.","summary":"Prometheus alert notification queue predicted to run full in less than 30m."},"alerts":[],"health":"ok","evaluationTime":0.000116999,"lastEvaluation":"2026-04-23T09:27:35.149645535Z","type":"alerting"},{"state":"inactive","name":"PrometheusErrorSendingAlertsToSomeAlertmanagers","query":"(rate(prometheus_notifications_errors_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) / rate(prometheus_notifications_sent_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m])) * 100 \u003e 1","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ printf \"%.1f\" $value }}% of alerts sent by Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}} were affected by errors.","summary":"More than 1% of alerts sent by Prometheus to a specific Alertmanager were affected by errors."},"alerts":[],"health":"ok","evaluationTime":0.000133295,"lastEvaluation":"2026-04-23T09:27:35.149765517Z","type":"alerting"},{"state":"inactive","name":"PrometheusNotConnectedToAlertmanagers","query":"max_over_time(prometheus_notifications_alertmanagers_discovered{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003c 1","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.","summary":"Prometheus is not connected to any Alertmanagers."},"alerts":[],"health":"ok","evaluationTime":0.00005706,"lastEvaluation":"2026-04-23T09:27:35.1499018Z","type":"alerting"},{"state":"inactive","name":"PrometheusTSDBReloadsFailing","query":"increase(prometheus_tsdb_reloads_failures_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[3h]) \u003e 0","duration":14400,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.","summary":"Prometheus has issues reloading blocks from disk."},"alerts":[],"health":"ok","evaluationTime":0.000063218,"lastEvaluation":"2026-04-23T09:27:35.149962613Z","type":"alerting"},{"state":"inactive","name":"PrometheusTSDBCompactionsFailing","query":"increase(prometheus_tsdb_compactions_failed_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[3h]) \u003e 0","duration":14400,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.","summary":"Prometheus has issues compacting blocks."},"alerts":[],"health":"ok","evaluationTime":0.000056199,"lastEvaluation":"2026-04-23T09:27:35.150028289Z","type":"alerting"},{"state":"inactive","name":"PrometheusNotIngestingSamples","query":"(sum without (type) (rate(prometheus_tsdb_head_samples_appended_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m])) \u003c= 0 and (sum without (scrape_job) (prometheus_target_metadata_cache_entries{job=~\"prometheus-k8s|prometheus-user-workload\"}) \u003e 0 or sum without (rule_group) (prometheus_rule_group_rules{job=~\"prometheus-k8s|prometheus-user-workload\"}) \u003e 0))","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.","summary":"Prometheus is not ingesting samples."},"alerts":[],"health":"ok","evaluationTime":0.00048282,"lastEvaluation":"2026-04-23T09:27:35.150086943Z","type":"alerting"},{"state":"inactive","name":"PrometheusDuplicateTimestamps","query":"rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf \"%.4g\" $value  }} samples/s with different values but duplicated timestamp.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/PrometheusDuplicateTimestamps.md","summary":"Prometheus is dropping samples with duplicate timestamps."},"alerts":[],"health":"ok","evaluationTime":0.00007325,"lastEvaluation":"2026-04-23T09:27:35.150573469Z","type":"alerting"},{"state":"inactive","name":"PrometheusOutOfOrderTimestamps","query":"rate(prometheus_target_scrapes_sample_out_of_order_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf \"%.4g\" $value  }} samples/s with timestamps arriving out of order.","summary":"Prometheus drops samples with out-of-order timestamps."},"alerts":[],"health":"ok","evaluationTime":0.000053698,"lastEvaluation":"2026-04-23T09:27:35.15065018Z","type":"alerting"},{"state":"inactive","name":"PrometheusRemoteStorageFailures","query":"((rate(prometheus_remote_storage_failed_samples_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m])) / ((rate(prometheus_remote_storage_failed_samples_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m])) + (rate(prometheus_remote_storage_succeeded_samples_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) or rate(prometheus_remote_storage_samples_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m])))) * 100 \u003e 1","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf \"%.1f\" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/PrometheusRemoteStorageFailures.md","summary":"Prometheus fails to send samples to remote storage."},"alerts":[],"health":"ok","evaluationTime":0.000205596,"lastEvaluation":"2026-04-23T09:27:35.150706911Z","type":"alerting"},{"state":"inactive","name":"PrometheusRemoteWriteBehind","query":"(max_over_time(prometheus_remote_storage_queue_highest_timestamp_seconds{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) - max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m])) \u003e 120","duration":900,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf \"%.1f\" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.","summary":"Prometheus remote write is behind."},"alerts":[],"health":"ok","evaluationTime":0.000078484,"lastEvaluation":"2026-04-23T09:27:35.150915729Z","type":"alerting"},{"state":"inactive","name":"PrometheusRemoteWriteDesiredShards","query":"(max_over_time(prometheus_remote_storage_shards_desired{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e max_over_time(prometheus_remote_storage_shards_max{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]))","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance=\"%s\",job=~\"prometheus-k8s|prometheus-user-workload\"}` $labels.instance | query | first | value }}.","summary":"Prometheus remote write desired shards calculation wants to run more than configured max shards."},"alerts":[],"health":"ok","evaluationTime":0.000063742,"lastEvaluation":"2026-04-23T09:27:35.150996854Z","type":"alerting"},{"state":"inactive","name":"PrometheusRuleFailures","query":"increase(prometheus_rule_evaluation_failures_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf \"%.0f\" $value }} rules in the last 5m.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/PrometheusRuleFailures.md","summary":"Prometheus is failing rule evaluations."},"alerts":[],"health":"ok","evaluationTime":0.000235122,"lastEvaluation":"2026-04-23T09:27:35.151063034Z","type":"alerting"},{"state":"inactive","name":"PrometheusMissingRuleEvaluations","query":"increase(prometheus_rule_group_iterations_missed_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf \"%.0f\" $value }} rule group evaluations in the last 5m.","summary":"Prometheus is missing rule evaluations due to slow rule group evaluation."},"alerts":[],"health":"ok","evaluationTime":0.000283306,"lastEvaluation":"2026-04-23T09:27:35.151313425Z","type":"alerting"},{"state":"inactive","name":"PrometheusTargetLimitHit","query":"increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf \"%.0f\" $value }} targets because the number of targets exceeded the configured target_limit.","summary":"Prometheus has dropped targets because some scrape configs have exceeded the targets limit."},"alerts":[],"health":"ok","evaluationTime":0.000060657,"lastEvaluation":"2026-04-23T09:27:35.15159966Z","type":"alerting"},{"state":"inactive","name":"PrometheusLabelLimitHit","query":"increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf \"%.0f\" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.","summary":"Prometheus has dropped targets because some scrape configs have exceeded the labels limit."},"alerts":[],"health":"ok","evaluationTime":0.000069535,"lastEvaluation":"2026-04-23T09:27:35.151662685Z","type":"alerting"},{"state":"inactive","name":"PrometheusScrapeBodySizeLimitHit","query":"increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf \"%.0f\" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/PrometheusScrapeBodySizeLimitHit.md","summary":"Prometheus has dropped some targets that exceeded body size limit."},"alerts":[],"health":"ok","evaluationTime":0.000057504,"lastEvaluation":"2026-04-23T09:27:35.151735053Z","type":"alerting"},{"state":"inactive","name":"PrometheusScrapeSampleLimitHit","query":"increase(prometheus_target_scrapes_exceeded_sample_limit_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf \"%.0f\" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit.","summary":"Prometheus has failed scrapes that have exceeded the configured sample limit."},"alerts":[],"health":"ok","evaluationTime":0.000052621,"lastEvaluation":"2026-04-23T09:27:35.151794888Z","type":"alerting"},{"state":"inactive","name":"PrometheusTargetSyncFailure","query":"increase(prometheus_target_sync_failed_total{job=~\"prometheus-k8s|prometheus-user-workload\"}[30m]) \u003e 0","duration":300,"keepFiringFor":0,"labels":{"severity":"critical"},"annotations":{"description":"{{ printf \"%.0f\" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/PrometheusTargetSyncFailure.md","summary":"Prometheus has failed to sync targets."},"alerts":[],"health":"ok","evaluationTime":0.000171281,"lastEvaluation":"2026-04-23T09:27:35.151849915Z","type":"alerting"},{"state":"inactive","name":"PrometheusHighQueryLoad","query":"avg_over_time(prometheus_engine_queries{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job=~\"prometheus-k8s|prometheus-user-workload\"}[5m]) \u003e 0.8","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes.","summary":"Prometheus is reaching its maximum capacity serving concurrent requests."},"alerts":[],"health":"ok","evaluationTime":0.000102834,"lastEvaluation":"2026-04-23T09:27:35.152023572Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.002877518,"lastEvaluation":"2026-04-23T09:27:35.149251109Z"},{"name":"thanos-sidecar","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-prometheus-k8s-thanos-sidecar-rules-2c9d8fef-7005-483f-bc07-b5bbe0f846bc.yaml","rules":[{"state":"inactive","name":"ThanosSidecarBucketOperationsFailed","query":"sum by (namespace, job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"prometheus-(k8s|user-workload)-thanos-sidecar\"}[5m])) \u003e 0","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Thanos Sidecar {{$labels.instance}} in {{$labels.namespace}} bucket operations are failing","summary":"Thanos Sidecar bucket operations are failing"},"alerts":[],"health":"ok","evaluationTime":0.000287412,"lastEvaluation":"2026-04-23T09:27:53.915479355Z","type":"alerting"},{"state":"inactive","name":"ThanosSidecarNoConnectionToStartedPrometheus","query":"thanos_sidecar_prometheus_up{job=~\"prometheus-(k8s|user-workload)-thanos-sidecar\"} == 0 and on (namespace, pod) prometheus_tsdb_data_replay_duration_seconds != 0","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Thanos Sidecar {{$labels.instance}} in {{$labels.namespace}} is unhealthy.","summary":"Thanos Sidecar cannot access Prometheus, even though Prometheus seems healthy and has reloaded WAL."},"alerts":[],"health":"ok","evaluationTime":0.000144106,"lastEvaluation":"2026-04-23T09:27:53.915771628Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000462769,"lastEvaluation":"2026-04-23T09:27:53.915455181Z"},{"name":"config-reloaders","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-prometheus-operator-rules-ab6d0731-4033-4242-bfdc-bc107271e2a4.yaml","rules":[{"state":"inactive","name":"ConfigReloaderSidecarErrors","query":"max_over_time(reloader_last_reload_successful{namespace=~\".+\"}[5m]) == 0","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.\nAs a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.","summary":"config-reloader sidecar has not had a successful reload for 10m"},"alerts":[],"health":"ok","evaluationTime":0.000268502,"lastEvaluation":"2026-04-23T09:27:51.470203849Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000297141,"lastEvaluation":"2026-04-23T09:27:51.470177781Z"},{"name":"prometheus-operator","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-prometheus-operator-rules-ab6d0731-4033-4242-bfdc-bc107271e2a4.yaml","rules":[{"state":"inactive","name":"PrometheusOperatorListErrors","query":"(sum by (cluster, controller, namespace) (rate(prometheus_operator_list_operations_failed_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[10m])) / sum by (cluster, controller, namespace) (rate(prometheus_operator_list_operations_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[10m]))) \u003e 0.4","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.","summary":"Errors while performing list operations in controller."},"alerts":[],"health":"ok","evaluationTime":0.000391593,"lastEvaluation":"2026-04-23T09:27:46.044479645Z","type":"alerting"},{"state":"inactive","name":"PrometheusOperatorWatchErrors","query":"(sum by (cluster, controller, namespace) (rate(prometheus_operator_watch_operations_failed_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[5m])) / sum by (cluster, controller, namespace) (rate(prometheus_operator_watch_operations_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[5m]))) \u003e 0.4","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.","summary":"Errors while performing watch operations in controller."},"alerts":[],"health":"ok","evaluationTime":0.000185944,"lastEvaluation":"2026-04-23T09:27:46.044876397Z","type":"alerting"},{"state":"inactive","name":"PrometheusOperatorSyncFailed","query":"min_over_time(prometheus_operator_syncs{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\",status=\"failed\"}[5m]) \u003e 0","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects.","summary":"Last controller reconciliation failed"},"alerts":[],"health":"ok","evaluationTime":0.000079055,"lastEvaluation":"2026-04-23T09:27:46.04506554Z","type":"alerting"},{"state":"inactive","name":"PrometheusOperatorReconcileErrors","query":"(sum by (cluster, controller, namespace) (rate(prometheus_operator_reconcile_errors_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[5m]))) / (sum by (cluster, controller, namespace) (rate(prometheus_operator_reconcile_operations_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[5m]))) \u003e 0.1","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.","summary":"Errors while reconciling objects."},"alerts":[],"health":"ok","evaluationTime":0.000175597,"lastEvaluation":"2026-04-23T09:27:46.04514711Z","type":"alerting"},{"state":"inactive","name":"PrometheusOperatorStatusUpdateErrors","query":"(sum by (cluster, controller, namespace) (rate(prometheus_operator_status_update_errors_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[5m]))) / (sum by (cluster, controller, namespace) (rate(prometheus_operator_status_update_operations_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[5m]))) \u003e 0.1","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"{{ $value | humanizePercentage }} of status update operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.","summary":"Errors while updating objects status."},"alerts":[],"health":"ok","evaluationTime":0.000223506,"lastEvaluation":"2026-04-23T09:27:46.045325488Z","type":"alerting"},{"state":"inactive","name":"PrometheusOperatorNodeLookupErrors","query":"rate(prometheus_operator_node_address_lookup_errors_total{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[5m]) \u003e 0.1","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.","summary":"Errors while reconciling Prometheus."},"alerts":[],"health":"ok","evaluationTime":0.000074329,"lastEvaluation":"2026-04-23T09:27:46.045552641Z","type":"alerting"},{"state":"inactive","name":"PrometheusOperatorNotReady","query":"min by (cluster, controller, namespace) (max_over_time(prometheus_operator_ready{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\"}[5m]) == 0)","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources.","summary":"Prometheus operator not ready"},"alerts":[],"health":"ok","evaluationTime":0.000092048,"lastEvaluation":"2026-04-23T09:27:46.045630244Z","type":"alerting"},{"state":"inactive","name":"PrometheusOperatorRejectedResources","query":"min_over_time(prometheus_operator_managed_resources{job=\"prometheus-operator\",namespace=~\"openshift-monitoring|openshift-user-workload-monitoring\",state=\"rejected\"}[5m]) \u003e 0","duration":300,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf \"%0.0f\" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/PrometheusOperatorRejectedResources.md","summary":"Resources rejected by Prometheus operator"},"alerts":[],"health":"ok","evaluationTime":0.000085955,"lastEvaluation":"2026-04-23T09:27:46.04572498Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.001356097,"lastEvaluation":"2026-04-23T09:27:46.044457146Z"},{"name":"telemeter.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-telemetry-51017fed-0955-4faa-91b8-cfcacd1b882b.yaml","rules":[{"name":"cluster:telemetry_selected_series:count","query":"max(federate_samples - federate_filtered_samples)","health":"ok","evaluationTime":0.000269584,"lastEvaluation":"2026-04-23T09:27:54.384185891Z","type":"recording"},{"state":"inactive","name":"TelemeterClientFailures","query":"sum by (namespace) (rate(federate_requests_failed_total{job=\"telemeter-client\"}[15m])) / sum by (namespace) (rate(federate_requests_total{job=\"telemeter-client\"}[15m])) \u003e 0.2","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"The telemeter client in namespace {{ $labels.namespace }} fails {{ $value | humanize }} of the requests to the telemeter service.\nCheck the logs of the telemeter-client pod with the following command:\noc logs -n openshift-monitoring deployment.apps/telemeter-client -c telemeter-client\nIf the telemeter client fails to authenticate with the telemeter service, make sure that the global pull secret is up to date, see https://docs.openshift.com/container-platform/latest/openshift_images/managing_images/using-image-pull-secrets.html#images-update-global-pull-secret_using-image-pull-secrets for more details.","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/TelemeterClientFailures.md","summary":"Telemeter client fails to send metrics"},"alerts":[],"health":"ok","evaluationTime":0.000182003,"lastEvaluation":"2026-04-23T09:27:54.384461394Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.00048246,"lastEvaluation":"2026-04-23T09:27:54.384163649Z"},{"name":"thanos-query","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-thanos-querier-81b7879e-0cb5-4305-9fb0-7468e0e55fe4.yaml","rules":[{"state":"inactive","name":"ThanosQueryHttpRequestQueryErrorRateHigh","query":"(sum by (namespace, job) (rate(http_requests_total{code=~\"5..\",handler=\"query\",job=\"thanos-querier\"}[5m])) / sum by (namespace, job) (rate(http_requests_total{handler=\"query\",job=\"thanos-querier\"}[5m]))) * 100 \u003e 5","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Thanos Query {{$labels.job}} in {{$labels.namespace}} is failing to handle {{$value | humanize}}% of \"query\" requests.","summary":"Thanos Query is failing to handle requests."},"alerts":[],"health":"ok","evaluationTime":0.000403493,"lastEvaluation":"2026-04-23T09:27:43.605745498Z","type":"alerting"},{"state":"inactive","name":"ThanosQueryHttpRequestQueryRangeErrorRateHigh","query":"(sum by (namespace, job) (rate(http_requests_total{code=~\"5..\",handler=\"query_range\",job=\"thanos-querier\"}[5m])) / sum by (namespace, job) (rate(http_requests_total{handler=\"query_range\",job=\"thanos-querier\"}[5m]))) * 100 \u003e 5","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Thanos Query {{$labels.job}} in {{$labels.namespace}} is failing to handle {{$value | humanize}}% of \"query_range\" requests.","summary":"Thanos Query is failing to handle requests."},"alerts":[],"health":"ok","evaluationTime":0.000151503,"lastEvaluation":"2026-04-23T09:27:43.606155007Z","type":"alerting"},{"state":"inactive","name":"ThanosQueryGrpcServerErrorRate","query":"(sum by (namespace, job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\",job=\"thanos-querier\"}[5m])) / sum by (namespace, job) (rate(grpc_server_started_total{job=\"thanos-querier\"}[5m])) * 100 \u003e 5)","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Thanos Query {{$labels.job}} in {{$labels.namespace}} is failing to handle {{$value | humanize}}% of requests.","summary":"Thanos Query is failing to handle requests."},"alerts":[],"health":"ok","evaluationTime":0.000434148,"lastEvaluation":"2026-04-23T09:27:43.60630989Z","type":"alerting"},{"state":"inactive","name":"ThanosQueryGrpcClientErrorRate","query":"(sum by (namespace, job) (rate(grpc_client_handled_total{grpc_code!=\"OK\",job=\"thanos-querier\"}[5m])) / sum by (namespace, job) (rate(grpc_client_started_total{job=\"thanos-querier\"}[5m]))) * 100 \u003e 5","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Thanos Query {{$labels.job}} in {{$labels.namespace}} is failing to send {{$value | humanize}}% of requests.","summary":"Thanos Query is failing to send requests."},"alerts":[],"health":"ok","evaluationTime":0.000134112,"lastEvaluation":"2026-04-23T09:27:43.606747708Z","type":"alerting"},{"state":"inactive","name":"ThanosQueryHighDNSFailures","query":"(sum by (namespace, job) (rate(thanos_query_store_apis_dns_failures_total{job=\"thanos-querier\"}[5m])) / sum by (namespace, job) (rate(thanos_query_store_apis_dns_lookups_total{job=\"thanos-querier\"}[5m]))) * 100 \u003e 1","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Thanos Query {{$labels.job}} in {{$labels.namespace}} have {{$value | humanize}}% of failing DNS queries for store endpoints.","summary":"Thanos Query is having high number of DNS failures."},"alerts":[],"health":"ok","evaluationTime":0.000104631,"lastEvaluation":"2026-04-23T09:27:43.60688495Z","type":"alerting"},{"state":"inactive","name":"ThanosQueryOverload","query":"(max_over_time(thanos_query_concurrent_gate_queries_max[5m]) - avg_over_time(thanos_query_concurrent_gate_queries_in_flight[5m]) \u003c 1)","duration":3600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Thanos Query {{$labels.job}} in {{$labels.namespace}} has been overloaded for more than 15 minutes. This may be a symptom of excessive simultaneous complex requests, low performance of the Prometheus API, or failures within these components. Assess the health of the Thanos query instances, the connected Prometheus instances, look for potential senders of these requests and then contact support.","summary":"Thanos query reaches its maximum capacity serving concurrent requests."},"alerts":[],"health":"ok","evaluationTime":0.000114982,"lastEvaluation":"2026-04-23T09:27:43.606992391Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.001403846,"lastEvaluation":"2026-04-23T09:27:43.605706027Z"},{"name":"openshift-network.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-network-operator-openshift-network-operator-ipsec-rules-0564be43-1b76-4aeb-9b85-f8837a58bfb7.yaml","rules":[{"name":"openshift:openshift_network_operator_ipsec_state:info","query":"group by (mode, is_legacy_api) (openshift_network_operator_ipsec_state{namespace=~\"openshift-network-operator\"})","health":"ok","evaluationTime":0.000161366,"lastEvaluation":"2026-04-23T09:27:44.415908139Z","type":"recording"}],"interval":30,"limit":0,"evaluationTime":0.000188689,"lastEvaluation":"2026-04-23T09:27:44.415883376Z"},{"name":"olm.csv_abnormal.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-operator-lifecycle-manager-olm-alert-rules-f54e416a-c599-49d8-b3c1-20d0e9252d86.yaml","rules":[{"state":"inactive","name":"CsvAbnormalFailedOver2Min","query":"last_over_time(csv_abnormal{phase=\"Failed\"}[5m])","duration":120,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Failed to install Operator {{ $labels.name }} version {{ $labels.version }}. Reason-{{ $labels.reason }}","summary":"CSV failed for over 2 minutes"},"alerts":[],"health":"ok","evaluationTime":0.000138881,"lastEvaluation":"2026-04-23T09:27:35.300760365Z","type":"alerting"},{"state":"inactive","name":"CsvAbnormalOver30Min","query":"last_over_time(csv_abnormal{phase=~\"(Replacing|Pending|Deleting|Unknown)\"}[5m])","duration":1800,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Failed to install Operator {{ $labels.name }} version {{ $labels.version }}. Phase-{{ $labels.phase }} Reason-{{ $labels.reason }}","summary":"CSV abnormal for over 30 minutes"},"alerts":[],"health":"ok","evaluationTime":0.000104601,"lastEvaluation":"2026-04-23T09:27:35.300903082Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000270126,"lastEvaluation":"2026-04-23T09:27:35.300739679Z"},{"name":"olm.installplan.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-operator-lifecycle-manager-olm-alert-rules-f54e416a-c599-49d8-b3c1-20d0e9252d86.yaml","rules":[{"state":"inactive","name":"InstallPlanStepAppliedWithWarnings","query":"sum by (namespace) (increase(installplan_warnings_total[5m])) \u003e 0","duration":0,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"The API server returned a warning during installation or upgrade of an operator. An Event with reason \"AppliedWithWarnings\" has been created with complete details, including a reference to the InstallPlan step that generated the warning.","summary":"API returned a warning when modifying an operator"},"alerts":[],"health":"ok","evaluationTime":0.000204802,"lastEvaluation":"2026-04-23T09:27:53.074496336Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.000231381,"lastEvaluation":"2026-04-23T09:27:53.074472328Z"},{"name":"cluster-network-operator-ovn.rules","file":"/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-ovn-kubernetes-networking-rules-e50fbfc3-cc5f-4a1f-ac32-58c8877d7bbb.yaml","rules":[{"state":"inactive","name":"NodeWithoutOVNKubeNodePodRunning","query":"(kube_node_info unless on (node) (kube_pod_info{namespace=\"openshift-ovn-kubernetes\",pod=~\"ovnkube-node.*\"} or kube_node_labels{label_kubernetes_io_os=\"windows\"})) \u003e 0","duration":1200,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Networking is degraded on nodes that do not have a functioning ovnkube-node pod. Existing workloads on the\nnode may continue to have connectivity but any changes to the networking control plane will not be implemented.\n","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-network-operator/NodeWithoutOVNKubeNodePodRunning.md","summary":"All Linux nodes should be running an ovnkube-node pod, {{ $labels.node }} is not."},"alerts":[],"health":"ok","evaluationTime":0.000311995,"lastEvaluation":"2026-04-23T09:27:31.736409502Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesControllerDisconnectedSouthboundDatabase","query":"max_over_time(ovn_controller_southbound_database_connected[5m]) == 0","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Networking is degraded on nodes when OVN controller is not connected to OVN southbound database connection. No networking control plane updates will be applied to the node.\n","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-network-operator/OVNKubernetesControllerDisconnectedSouthboundDatabase.md","summary":"Networking control plane is degraded on node {{ $labels.node }} because OVN controller is not connected to OVN southbound database."},"alerts":[],"health":"ok","evaluationTime":0.000084008,"lastEvaluation":"2026-04-23T09:27:31.736727048Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesNodePodAddError","query":"(sum by (instance, namespace) (rate(ovnkube_node_cni_request_duration_seconds_count{command=\"ADD\",err=\"true\"}[5m])) / sum by (instance, namespace) (rate(ovnkube_node_cni_request_duration_seconds_count{command=\"ADD\"}[5m]))) \u003e 0.1","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"OVN Kubernetes experiences pod creation errors at an elevated rate. The pods will be retried.","summary":"OVN Kubernetes is experiencing pod creation errors at an elevated rate."},"alerts":[],"health":"ok","evaluationTime":0.00014447,"lastEvaluation":"2026-04-23T09:27:31.736814212Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesNodePodDeleteError","query":"(sum by (instance, namespace) (rate(ovnkube_node_cni_request_duration_seconds_count{command=\"DEL\",err=\"true\"}[5m])) / sum by (instance, namespace) (rate(ovnkube_node_cni_request_duration_seconds_count{command=\"DEL\"}[5m]))) \u003e 0.1","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"OVN Kubernetes experiences pod deletion errors at an elevated rate. The pods will be retried.","summary":"OVN Kubernetes experiencing pod deletion errors at an elevated rate."},"alerts":[],"health":"ok","evaluationTime":0.000120452,"lastEvaluation":"2026-04-23T09:27:31.736961762Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesResourceRetryFailure","query":"increase(ovnkube_resource_retry_failures_total[10m]) \u003e 0","duration":0,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"OVN Kubernetes failed to apply networking control plane configuration after several attempts. This might be because the configuration\nprovided by the user is invalid or because of an internal error. As a consequence, the cluster might have a degraded status.\n","summary":"OVN Kubernetes failed to apply networking control plane configuration."},"alerts":[],"health":"ok","evaluationTime":0.000067523,"lastEvaluation":"2026-04-23T09:27:31.737084905Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesNodeOVSOverflowUserspace","query":"increase(ovs_vswitchd_netlink_overflow[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Netlink messages dropped by OVS vSwitch daemon due to netlink socket buffer overflow. This will result in packet loss.","summary":"OVS vSwitch daemon drops packets due to buffer overflow."},"alerts":[],"health":"ok","evaluationTime":0.000064696,"lastEvaluation":"2026-04-23T09:27:31.737155073Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesNodeOVSOverflowKernel","query":"increase(ovs_vswitchd_dp_flows_lookup_lost[5m]) \u003e 0","duration":900,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"Netlink messages dropped by OVS kernel module due to netlink socket buffer overflow. This will result in packet loss.","summary":"OVS kernel module drops packets due to buffer overflow."},"alerts":[],"health":"ok","evaluationTime":0.000061941,"lastEvaluation":"2026-04-23T09:27:31.737223665Z","type":"alerting"},{"state":"inactive","name":"NorthboundStale","query":"time() - max_over_time(ovnkube_controller_nb_e2e_timestamp[5m]) \u003e 120","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"OVN-Kubernetes controller and/or OVN northbound database may cause a\ndegraded networking control plane for the affected node. Existing\nworkloads should continue to have connectivity but new workloads may\nbe impacted.\n","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-network-operator/NorthboundStaleAlert.md","summary":"OVN-Kubernetes controller {{ $labels.instance }} has not successfully synced any changes to the northbound database for too long."},"alerts":[],"health":"ok","evaluationTime":0.000128355,"lastEvaluation":"2026-04-23T09:27:31.737288873Z","type":"alerting"},{"state":"inactive","name":"SouthboundStale","query":"max_over_time(ovnkube_controller_nb_e2e_timestamp[5m]) - max_over_time(ovnkube_controller_sb_e2e_timestamp[5m]) \u003e 120","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"OVN-Kubernetes controller and/or OVN northbound database may cause a\ndegraded networking control plane for the affected node. Existing\nworkloads should continue to have connectivity but new workloads may\nbe impacted.\n","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-network-operator/SouthboundStaleAlert.md","summary":"OVN northd {{ $labels.instance }} has not successfully synced any changes to the southbound database for too long."},"alerts":[],"health":"ok","evaluationTime":0.000117995,"lastEvaluation":"2026-04-23T09:27:31.737420897Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesNorthboundDatabaseCPUUsageHigh","query":"(sum by (instance, name, namespace) (rate(container_cpu_usage_seconds_total{container=\"nbdb\"}[5m]))) \u003e 0.8","duration":900,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"High OVN northbound CPU usage indicates high load on the networking\ncontrol plane for the affected node.\n","summary":"OVN northbound database {{ $labels.instance }} is greater than {{ $value | humanizePercentage }} percent CPU usage for a period of time."},"alerts":[],"health":"ok","evaluationTime":0.000101315,"lastEvaluation":"2026-04-23T09:27:31.737542238Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesSouthboundDatabaseCPUUsageHigh","query":"(sum by (instance, name, namespace) (rate(container_cpu_usage_seconds_total{container=\"sbdb\"}[5m]))) \u003e 0.8","duration":900,"keepFiringFor":0,"labels":{"severity":"info"},"annotations":{"description":"High OVN southbound CPU usage indicates high load on the networking\ncontrol plane for the affected node.\n","summary":"OVN southbound database {{ $labels.instance }} is greater than {{ $value | humanizePercentage }} percent CPU usage for a period of time."},"alerts":[],"health":"ok","evaluationTime":0.000108262,"lastEvaluation":"2026-04-23T09:27:31.737647323Z","type":"alerting"},{"state":"inactive","name":"OVNKubernetesNorthdInactive","query":"count by (instance, name, namespace) (ovn_northd_status != 1) \u003e 0","duration":600,"keepFiringFor":0,"labels":{"severity":"warning"},"annotations":{"description":"An inactive OVN northd instance may cause a degraded networking\ncontrol plane for the affected node. Existing workloads should\ncontinue to have connectivity but new workloads may be impacted.\n","runbook_url":"https://github.com/openshift/runbooks/blob/master/alerts/cluster-network-operator/OVNKubernetesNorthdInactive.md","summary":"OVN northd {{ $labels.instance }} is not active."},"alerts":[],"health":"ok","evaluationTime":0.000068892,"lastEvaluation":"2026-04-23T09:27:31.737758487Z","type":"alerting"}],"interval":30,"limit":0,"evaluationTime":0.001440803,"lastEvaluation":"2026-04-23T09:27:31.736389045Z"}]}}