<?xml version="1.0" encoding="utf-8"?><testsuites name="pytest tests"><testsuite name="pytest" errors="0" failures="3" skipped="2" tests="152" time="1365.199" timestamp="2026-06-05T18:17:56.794941+00:00" hostname="maas-group-test-8mz9q-e2e-maas-openshift-pod"><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyCRUD" name="test_create_api_key" time="0.108" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyCRUD" name="test_list_api_keys" time="0.138" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyCRUD" name="test_revoke_api_key" time="0.098" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyAuthorization" name="test_admin_manage_other_users_keys" time="0.126" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyAuthorization" name="test_non_admin_cannot_access_other_users_keys" time="0.092" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyBulkOperations" name="test_bulk_revoke_own_keys" time="0.242" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyBulkOperations" name="test_bulk_revoke_other_user_forbidden" time="0.031" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyBulkOperations" name="test_bulk_revoke_admin_can_revoke_any_user" time="0.087" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyExpiration" name="test_create_key_within_expiration_limit" time="0.029" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyExpiration" name="test_create_key_at_expiration_limit" time="0.029" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyExpiration" name="test_create_key_exceeds_expiration_limit" time="0.030" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyExpiration" name="test_create_key_without_expiration" time="0.032" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyExpiration" name="test_create_key_with_short_expiration" time="0.036" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyModelInference" name="test_api_key_model_access_success" time="0.099" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyModelInference" name="test_invalid_api_key_rejected" time="0.021" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyModelInference" name="test_no_auth_header_rejected" time="0.019" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyModelInference" name="test_revoked_api_key_rejected" time="2.121" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyModelInference" name="test_api_key_chat_completions" time="0.040" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyRevocationE2E" name="test_double_revoke_returns_404" time="0.095" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyRevocationE2E" name="test_revoke_nonexistent_key_returns_404" time="0.036" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyRevocationE2E" name="test_revoke_then_create_new_key_works" time="0.142" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyRevocationE2E" name="test_individual_revoke_multiple_keys" time="0.178" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeyRevocationE2E" name="test_revoke_keys_rejected_at_gateway" time="0.298" /><testcase classname="test.e2e.tests.test_api_keys.TestEphemeralKeyCleanup" name="test_cronjob_exists_and_configured" time="0.108" /><testcase classname="test.e2e.tests.test_api_keys.TestEphemeralKeyCleanup" name="test_cleanup_networkpolicy_exists" time="0.108" /><testcase classname="test.e2e.tests.test_api_keys.TestEphemeralKeyCleanup" name="test_create_ephemeral_key" time="0.171" /><testcase classname="test.e2e.tests.test_api_keys.TestEphemeralKeyCleanup" name="test_trigger_cleanup_preserves_active_keys" time="0.528" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeySubscriptionPhases" name="test_create_key_for_active_subscription" time="9.243" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeySubscriptionPhases" name="test_create_key_for_degraded_subscription" time="19.166" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeySubscriptionPhases" name="test_create_key_for_failed_subscription" time="19.282" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeySubscriptionPhases" name="test_create_key_for_pending_subscription" time="19.278" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeySubscriptionPhases" name="test_reject_key_for_unreconciled_subscription" time="22.498" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeySubscriptionFilter" name="test_search_filters_by_subscription" time="14.339" /><testcase classname="test.e2e.tests.test_api_keys.TestAPIKeySubscriptionFilter" name="test_search_without_subscription_returns_all" time="0.164" /><testcase classname="test.e2e.tests.test_namespace_scoping.TestMaaSAPIWatchNamespace" name="test_subscription_in_subscription_namespace_visible_to_api" time="8.516" /><testcase classname="test.e2e.tests.test_namespace_scoping.TestMaaSAPIWatchNamespace" name="test_subscription_in_another_namespace_not_visible_to_api" time="22.641" /><testcase classname="test.e2e.tests.test_namespace_scoping.TestMaaSControllerWatchNamespace" name="test_authpolicy_and_subscription_in_maas_subscription_namespace" time="23.953" /><testcase classname="test.e2e.tests.test_namespace_scoping.TestMaaSControllerWatchNamespace" name="test_authpolicy_and_subscription_in_another_namespace" time="30.523" /><testcase classname="test.e2e.tests.test_namespace_scoping.TestModelRef" name="test_auth_policy_model_ref" time="31.370" /><testcase classname="test.e2e.tests.test_namespace_scoping.TestModelRef" name="test_subscription_model_ref" time="31.825" /><testcase classname="test.e2e.tests.test_negative_security.TestHeaderSpoofing" name="test_injected_identity_headers_ignored" time="0.077" /><testcase classname="test.e2e.tests.test_negative_security.TestHeaderSpoofing" name="test_duplicate_subscription_headers_ignored" time="0.064" /><testcase classname="test.e2e.tests.test_negative_security.TestExpiredKeyRejection" name="test_expired_key_rejected_at_gateway" time="5.068" /><testcase classname="test.e2e.tests.test_negative_security.TestCrossModelAccess" name="test_key_cannot_access_model_outside_subscription" time="0.053" /><testcase classname="test.e2e.tests.test_negative_security.TestAuthPolicyRemoval" name="test_authpolicy_deletion_revokes_access" time="5.018" /><testcase classname="test.e2e.tests.test_negative_security.TestMissingModelRef" name="test_subscription_with_nonexistent_model_ref" time="0.994" /><testcase classname="test.e2e.tests.test_negative_security.TestMissingModelRef" name="test_authpolicy_with_nonexistent_model_ref" time="0.662" /><testcase classname="test.e2e.tests.test_negative_security.TestHeaderAbuse" name="test_special_characters_in_subscription_header" time="0.148" /><testcase classname="test.e2e.tests.test_subscription.TestAuthEnforcement" name="test_authorized_user_gets_200" time="0.055" /><testcase classname="test.e2e.tests.test_subscription.TestAuthEnforcement" name="test_no_auth_gets_401" time="0.017" /><testcase classname="test.e2e.tests.test_subscription.TestAuthEnforcement" name="test_invalid_token_gets_403" time="0.017" /><testcase classname="test.e2e.tests.test_subscription.TestAuthEnforcement" name="test_wrong_group_gets_403" time="0.022" /><testcase classname="test.e2e.tests.test_subscription.TestAPIKeySubscriptionBinding" name="test_create_api_key_uses_highest_priority_subscription" time="0.281" /><testcase classname="test.e2e.tests.test_subscription.TestAPIKeySubscriptionBinding" name="test_create_api_key_with_explicit_simulator_subscription" time="0.064" /><testcase classname="test.e2e.tests.test_subscription.TestAPIKeySubscriptionBinding" name="test_create_api_key_nonexistent_subscription_errors" time="0.239" /><testcase classname="test.e2e.tests.test_subscription.TestSubscriptionEnforcement" name="test_subscribed_user_gets_200" time="0.029" /><testcase classname="test.e2e.tests.test_subscription.TestSubscriptionEnforcement" name="test_auth_pass_no_subscription_gets_403" time="16.388" /><testcase classname="test.e2e.tests.test_subscription.TestSubscriptionEnforcement" name="test_rate_limit_exhaustion_gets_429" time="24.889"><failure message="AssertionError: Unexpected status 503 at request 1: no healthy upstream">self = &lt;test_subscription.TestSubscriptionEnforcement object at 0x7fee6a198cd0&gt;

    def test_rate_limit_exhaustion_gets_429(self):
        """
        Test that a user gets 429 when they actually exceed their token rate limit.
    
        This test creates a dedicated subscription with a very low token limit,
        sends enough requests to exhaust it, and verifies a 429 response.
    
        Uses the unconfigured model to avoid interfering with other tests.
        """
        # Use unconfigured model to isolate this test
        model_ref = UNCONFIGURED_MODEL_REF
        model_path = UNCONFIGURED_MODEL_PATH
    
        # Create unique subscription and auth policy names
        auth_policy_name = "e2e-rate-limit-test-auth"
        subscription_name = "e2e-rate-limit-test-subscription"
    
        # Low limit so we exhaust it quickly. Actual tokens consumed per
        # response are non-deterministic (max_tokens is a ceiling, not exact),
        # so we send enough requests to be confident we hit the limit without
        # asserting exactly when the 429 arrives.
        token_limit = 10
        window = "1m"
        total_requests = 15
    
        try:
            # 1. Create auth policy allowing system:authenticated
            _create_test_auth_policy(
                name=auth_policy_name,
                model_refs=[model_ref],
                groups=["system:authenticated"]
            )
            _wait_reconcile()
    
            # 2. Create subscription with low token limit
            _create_test_subscription(
                name=subscription_name,
                model_refs=[model_ref],
                groups=["system:authenticated"],
                token_limit=token_limit,
                window=window
            )
            _wait_reconcile()
    
            # Wait for TRLP to be created AND enforced by Kuadrant/Limitador.
            # Without this, requests bypass token rate limiting entirely.
            _wait_for_token_rate_limit_policy(model_ref, model_namespace=MODEL_NAMESPACE, timeout=90)
    
            # 3. API key must be minted for this subscription
            oc_token = _get_cluster_token()
            api_key = _create_api_key(
                oc_token,
                name=f"e2e-rate-limit-{uuid.uuid4().hex[:8]}",
                subscription=subscription_name,
            )
    
            # 4. Send requests to exhaust the limit
            rate_limited = False
            success_count = 0
    
            for i in range(total_requests):
                r = _inference(api_key, path=model_path, max_tokens=1)
                request_num = i + 1
                log.info(f"Request {request_num}/{total_requests}: {r.status_code}")
    
                if r.status_code == 200:
                    success_count += 1
                elif r.status_code == 429:
                    rate_limited = True
                    log.info(f"Rate limit exceeded after {success_count} successful requests")
    
                    # Verify it's a rate limit 429, not a subscription error
                    response_text = r.text.lower() if r.text else ""
                    # Rate limit 429s typically mention "rate", "limit", or "quota"
                    # Subscription 429s mention "subscription" without "rate"
                    is_rate_limit_error = any(keyword in response_text
                                             for keyword in ["rate", "limit", "quota", "too many"])
                    is_subscription_error = "subscription" in response_text and not is_rate_limit_error
    
                    assert is_rate_limit_error or not is_subscription_error, \
                        f"Expected rate limit 429, not subscription error. Response: {r.text[:500]}"
    
                    # Check for Retry-After header (optional but good practice)
                    retry_after = r.headers.get("Retry-After") or r.headers.get("retry-after")
                    if retry_after:
                        log.info(f"Retry-After header present: {retry_after}")
    
                    break
                else:
                    # Unexpected status code
&gt;                   raise AssertionError(f"Unexpected status {r.status_code} at request {request_num}: {r.text[:200]}")
E                   AssertionError: Unexpected status 503 at request 1: no healthy upstream

test/e2e/tests/test_subscription.py:506: AssertionError</failure></testcase><testcase classname="test.e2e.tests.test_subscription.TestSubscriptionEnforcement" name="test_models_endpoint_exempt_from_rate_limiting" time="25.218"><failure message="AssertionError: Expected to hit rate limit within 5 requests with 3 token limit, but got 0 successful requests without hitting limit&#10;assert False">self = &lt;test_subscription.TestSubscriptionEnforcement object at 0x7fee6a198880&gt;

    def test_models_endpoint_exempt_from_rate_limiting(self):
        """
        Test that /v1/models endpoint remains accessible when token quota is exhausted.
    
        This verifies that users can discover model capabilities even when they've
        used all their inference tokens. The /v1/models endpoint is a discovery/metadata
        endpoint that does not consume tokens and should remain accessible.
    
        Ref: https://issues.redhat.com/browse/RHOAIENG-46770
    
        Test steps:
        1. Create subscription with very low token limit (15 tokens)
        2. Exhaust the limit with inference requests (5 requests × 3 tokens = 15)
        3. Verify inference requests get 429 (rate limited)
        4. Verify /v1/models endpoint still returns 200 (not rate limited)
        """
        # Use unconfigured model to isolate this test
        model_ref = UNCONFIGURED_MODEL_REF
        model_path = UNCONFIGURED_MODEL_PATH
    
        # Create unique subscription and auth policy names
        auth_policy_name = "e2e-models-exempt-test-auth"
        subscription_name = "e2e-models-exempt-test-subscription"
    
        # Very low limit for fast, deterministic test
        # With 3 token limit and max_tokens=1, we're guaranteed to exhaust quota within 5 requests
        # (even if each request uses exactly 1 token: 5 requests &gt; 3 token limit)
        token_limit = 3
        window = "1m"
        max_tokens = 1
    
        try:
            # 1. Create auth policy allowing system:authenticated
            _create_test_auth_policy(
                name=auth_policy_name,
                model_refs=[model_ref],
                groups=["system:authenticated"]
            )
            _wait_reconcile()
            _wait_for_maas_auth_policy_phase(auth_policy_name, timeout=90)
    
            # 2. Create subscription with low token limit
            _create_test_subscription(
                name=subscription_name,
                model_refs=[model_ref],
                groups=["system:authenticated"],
                token_limit=token_limit,
                window=window
            )
            _wait_reconcile()
            _wait_for_maas_subscription_phase(subscription_name, timeout=90)
    
            # Wait for TRLP to be created AND enforced by Kuadrant/Limitador
            _wait_for_token_rate_limit_policy(model_ref, model_namespace=MODEL_NAMESPACE, timeout=90)
    
            # 3. Create API key for this subscription
            oc_token = _get_cluster_token()
            api_key = _create_api_key(
                oc_token,
                name=f"e2e-models-exempt-{uuid.uuid4().hex[:8]}",
                subscription=subscription_name,
            )
    
            # 4. Exhaust the token limit
            # With 3 token limit and 5 requests, we're guaranteed to hit the limit
            # (each successful request consumes ≥1 token, so 5 requests &gt; 3 token limit)
            max_requests = 5
            success_count = 0
            rate_limited = False
    
            log.info(f"Exhausting token quota: sending up to {max_requests} requests")
            for i in range(max_requests):
                r = _inference(api_key, path=model_path)
                request_num = i + 1
                log.info(f"Request {request_num}: status {r.status_code}")
    
                if r.status_code == 200:
                    success_count += 1
                elif r.status_code == 429:
                    log.info(f"Rate limit hit after {success_count} successful requests")
                    rate_limited = True
                    break
                else:
                    # Unexpected status during exhaustion
                    log.warning(f"Unexpected status during quota exhaustion: {r.status_code}")
    
            # Verify we hit rate limit (otherwise test setup is broken)
&gt;           assert rate_limited, \
                f"Expected to hit rate limit within {max_requests} requests with {token_limit} token limit, " \
                f"but got {success_count} successful requests without hitting limit"
E               AssertionError: Expected to hit rate limit within 5 requests with 3 token limit, but got 0 successful requests without hitting limit
E               assert False

test/e2e/tests/test_subscription.py:617: AssertionError</failure></testcase><testcase classname="test.e2e.tests.test_subscription.TestMultipleSubscriptionsPerModel" name="test_user_in_one_of_two_subscriptions_gets_200" time="8.361" /><testcase classname="test.e2e.tests.test_subscription.TestMultipleAuthPoliciesPerModel" name="test_two_auth_policies_or_logic" time="16.751" /><testcase classname="test.e2e.tests.test_subscription.TestMultipleAuthPoliciesPerModel" name="test_delete_one_auth_policy_other_still_works" time="24.513" /><testcase classname="test.e2e.tests.test_subscription.TestCascadeDeletion" name="test_delete_subscription_rebuilds_trlp" time="8.501" /><testcase classname="test.e2e.tests.test_subscription.TestCascadeDeletion" name="test_trlp_persists_during_multi_subscription_deletion" time="33.269" /><testcase classname="test.e2e.tests.test_subscription.TestCascadeDeletion" name="test_delete_last_subscription_denies_access" time="8.491" /><testcase classname="test.e2e.tests.test_subscription.TestCascadeDeletion" name="test_unconfigured_model_denied_by_gateway_auth" time="0.461" /><testcase classname="test.e2e.tests.test_subscription.TestOrderingEdgeCases" name="test_subscription_before_auth_policy" time="19.065" /><testcase classname="test.e2e.tests.test_subscription.TestManagedAnnotation" name="test_authpolicy_managed_false_prevents_update" time="20.872" /><testcase classname="test.e2e.tests.test_subscription.TestManagedAnnotation" name="test_trlp_managed_false_prevents_update" time="17.500" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_with_both_access_and_subscription_gets_200" time="11.728" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_with_access_but_no_subscription_gets_403" time="17.176" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_with_subscription_but_no_access_gets_403" time="17.759" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_single_subscription_auto_selects" time="17.485" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_multiple_subscriptions_separate_keys_gets_200" time="17.477" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_mint_api_key_denied_for_inaccessible_subscription" time="17.606" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_group_based_access_gets_200" time="17.080" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_group_based_auth_but_no_subscription_gets_403" time="17.176" /><testcase classname="test.e2e.tests.test_subscription.TestE2ESubscriptionFlow" name="test_e2e_group_based_subscription_but_no_auth_gets_403" time="17.464" /><testcase classname="test.e2e.tests.test_subscription.TestStatusReporting" name="test_subscription_active_status_with_valid_model" time="9.417" /><testcase classname="test.e2e.tests.test_subscription.TestStatusReporting" name="test_subscription_failed_status_with_missing_model" time="8.763" /><testcase classname="test.e2e.tests.test_subscription.TestStatusReporting" name="test_authpolicy_active_status_with_valid_model" time="8.762" /><testcase classname="test.e2e.tests.test_subscription.TestStatusReporting" name="test_authpolicy_failed_status_with_missing_model" time="8.734" /><testcase classname="test.e2e.tests.test_subscription.TestStatusReporting" name="test_subscription_degraded_status_with_partial_models" time="9.077" /><testcase classname="test.e2e.tests.test_subscription.TestStatusReporting" name="test_subscription_degraded_trlp_blocks_inference" time="97.709" /><testcase classname="test.e2e.tests.test_subscription.TestStatusReporting" name="test_authpolicy_degraded_status_with_partial_models" time="8.763" /><testcase classname="test.e2e.tests.test_subscription.TestStatusReporting" name="test_subscription_status_transitions_on_model_deletion" time="22.235" /><testcase classname="test.e2e.tests.test_subscription.TestDegradedSubscriptionFiltering" name="test_degraded_healthy_model_allows_inference" time="19.194" /><testcase classname="test.e2e.tests.test_subscription.TestDegradedSubscriptionFiltering" name="test_failed_subscription_blocks_inference" time="19.428" /><testcase classname="test.e2e.tests.test_subscription.TestDegradedSubscriptionFiltering" name="test_models_endpoint_with_degraded_subscription_api_key" time="19.204" /><testcase classname="test.e2e.tests.test_subscription.TestDegradedSubscriptionFiltering" name="test_models_endpoint_with_degraded_subscription_kube_token" time="19.163" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_single_subscription_auto_select" time="42.119" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_explicit_subscription_header" time="16.725" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_empty_subscription_header_value" time="8.411" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_models_filtered_by_subscription" time="8.781" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_deduplication_same_model_multiple_refs" time="17.171" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_different_modelrefs_same_model_id" time="17.165" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_multiple_distinct_models_in_subscription" time="19.370" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_user_token_returns_all_models" time="16.466" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_user_token_with_subscription_header_filters" time="17.030" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_empty_model_list" time="10.947" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_response_schema_matches_openapi" time="8.398" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_model_metadata_preserved" time="8.395" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_api_key_scoped_to_subscription" time="17.156" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_api_key_with_deleted_subscription_403" time="25.207" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_api_key_with_inaccessible_subscription_403" time="22.348" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_invalid_subscription_header_403" time="17.028" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_access_denied_to_subscription_403" time="17.665" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_api_key_ignores_subscription_header" time="22.294" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_multiple_api_keys_different_subscriptions" time="22.219" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_service_account_token_multiple_subs_no_header" time="14.349" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_service_account_token_multiple_subs_with_header" time="16.461" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_unauthenticated_request_401" time="0.022" /><testcase classname="test.e2e.tests.test_models_endpoint.TestModelsEndpoint" name="test_central_models_endpoint_exempt_from_rate_limiting" time="25.221"><failure message="AssertionError: Expected to hit rate limit within 5 requests with 3 token limit, but got 0 successful requests without hitting limit&#10;assert False">self = &lt;test_models_endpoint.TestModelsEndpoint object at 0x7fee69cd5dc0&gt;

    def test_central_models_endpoint_exempt_from_rate_limiting(self):
        """
        Test that the central /v1/models endpoint remains accessible when token quota is exhausted.
    
        This test validates the end-to-end flow:
        1. User exhausts token quota with inference requests (gets 429)
        2. Central /v1/models endpoint is exempt at gateway level (gateway-default-deny TRLP)
        3. Central endpoint calls model-specific /v1/models endpoints for discovery
        4. Model-specific endpoints are also exempt (per-route TRLP fix)
        5. Central endpoint successfully aggregates and returns model list
    
        This ensures the entire discovery chain works when quota is exhausted.
    
        Ref: https://issues.redhat.com/browse/RHOAIENG-46770
        """
        # Use unconfigured model to isolate this test
        model_ref = UNCONFIGURED_MODEL_REF
        model_path = UNCONFIGURED_MODEL_PATH
    
        # Create unique subscription and auth policy names
        auth_policy_name = "e2e-central-models-exempt-auth"
        subscription_name = "e2e-central-models-exempt-sub"
    
        # Very low limit for fast, deterministic test
        # With 3 token limit and max_tokens=1, we're guaranteed to exhaust quota within 5 requests
        # (each successful request consumes ≥1 token, so 5 requests &gt; 3 token limit)
        token_limit = 3
        window = "1m"
        max_tokens = 1
    
        try:
            # 1. Create auth policy allowing system:authenticated
            log.info(f"Creating auth policy for {model_ref}")
            _create_test_auth_policy(
                name=auth_policy_name,
                model_refs=[model_ref],
                groups=["system:authenticated"]
            )
            _wait_reconcile()
            _wait_for_maas_auth_policy_phase(auth_policy_name, timeout=90)
    
            # 2. Create subscription with low token limit
            log.info(f"Creating subscription with {token_limit} token limit")
            _create_test_subscription(
                name=subscription_name,
                model_refs=[model_ref],
                groups=["system:authenticated"],
                token_limit=token_limit,
                window=window
            )
            _wait_reconcile()
            _wait_for_maas_subscription_phase(subscription_name, timeout=90)
    
            # Wait for TRLP to be created and enforced
            _wait_for_token_rate_limit_policy(model_ref, model_namespace=MODEL_NAMESPACE, timeout=90)
    
            # 3. Create API key for this subscription
            oc_token = _get_cluster_token()
            api_key = _create_api_key(
                oc_token,
                name=f"e2e-central-exempt-{uuid.uuid4().hex[:8]}",
                subscription=subscription_name,
            )
    
            # 4. Exhaust the token limit
            # With 3 token limit and 5 requests, we're guaranteed to hit the limit
            # (each successful request consumes ≥1 token, so 5 requests &gt; 3 token limit)
            max_requests = 5
            success_count = 0
            rate_limited = False
    
            log.info(f"Exhausting token quota: sending up to {max_requests} requests")
            for i in range(max_requests):
                r = _inference(api_key, path=model_path)
                request_num = i + 1
                log.info(f"Request {request_num}: {r.status_code}")
    
                if r.status_code == 200:
                    success_count += 1
                elif r.status_code == 429:
                    log.info(f"Rate limit hit after {success_count} successful requests")
                    rate_limited = True
                    break
    
            # Verify we hit rate limit (otherwise test setup is broken)
&gt;           assert rate_limited, \
                f"Expected to hit rate limit within {max_requests} requests with {token_limit} token limit, " \
                f"but got {success_count} successful requests without hitting limit"
E               AssertionError: Expected to hit rate limit within 5 requests with 3 token limit, but got 0 successful requests without hitting limit
E               assert False

test/e2e/tests/test_models_endpoint.py:2195: AssertionError</failure></testcase><testcase classname="test.e2e.tests.test_external_models.TestExternalModelDiscovery" name="test_maasmodelref_created" time="5.151" /><testcase classname="test.e2e.tests.test_external_models.TestExternalModelDiscovery" name="test_reconciler_created_httproute" time="0.107" /><testcase classname="test.e2e.tests.test_external_models.TestExternalModelDiscovery" name="test_reconciler_created_backend_service" time="0.123" /><testcase classname="test.e2e.tests.test_external_models.TestExternalModelAuth" name="test_invalid_key_returns_401" time="0.023" /><testcase classname="test.e2e.tests.test_external_models.TestExternalModelAuth" name="test_no_key_returns_401" time="0.018" /><testcase classname="test.e2e.tests.test_external_models.TestExternalModelEgress" name="test_request_forwarded_returns_200" time="0.031" /><testcase classname="test.e2e.tests.test_external_models.TestExternalModelCleanup" name="test_delete_removes_httproute" time="37.524" /><testcase classname="test.e2e.tests.test_tenant.TestTenantLifecycle" name="test_tenant_ready_and_phase_healthy" time="0.334" /><testcase classname="test.e2e.tests.test_tenant.TestTenantLifecycle" name="test_payload_processing_deployed_with_active_tenant" time="0.109"><skipped type="pytest.skip" message="Tenant not Active (e.g. Degraded); payload-processing not asserted">/workspace/source/test/e2e/tests/test_tenant.py:127: Tenant not Active (e.g. Degraded); payload-processing not asserted</skipped></testcase><testcase classname="test.e2e.tests.test_tenant.TestTenantContract" name="test_status_has_phase_and_conditions" time="0.099" /><testcase classname="test.e2e.tests.test_tenant.TestTenantContract" name="test_spec_is_well_formed" time="0.101" /><testcase classname="test.e2e.tests.test_tenant.TestTenantContract" name="test_conditions_use_kubernetes_metav1_shape" time="0.106" /><testcase classname="test.e2e.tests.test_tenant.TestTenantNoFalseOwnership" name="test_maas_user_crs_not_owned_by_tenant" time="0.341" /><testcase classname="test.e2e.tests.test_aitenant_lifecycle.TestAITenantLifecycle" name="test_aitenant_create_bootstrap_resources" time="13.071" /><testcase classname="test.e2e.tests.test_aitenant_lifecycle.TestAITenantLifecycle" name="test_aitenant_delete_cleans_up_bootstrap_resources" time="14.296" /><testcase classname="test.e2e.tests.test_config_tenant.TestConfigAnchorPresence" name="test_cluster_config_default_exists" time="0.399" /><testcase classname="test.e2e.tests.test_config_tenant.TestConfigAnchorPresence" name="test_cluster_config_not_terminating" time="0.128" /><testcase classname="test.e2e.tests.test_config_tenant.TestConfigTenantOwnership" name="test_tenant_lists_config_owner_reference" time="0.106" /><testcase classname="test.e2e.tests.test_config_tenant.TestConfigTenantOwnership" name="test_maas_controller_deployment_lists_config_owner_reference" time="0.104" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCTokenFlow" name="test_oidc_token_can_create_api_key" time="0.119" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCTokenFlow" name="test_invalid_oidc_token_gets_401" time="0.118" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCTokenFlow" name="test_empty_bearer_token_gets_401" time="0.021" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCTokenFlow" name="test_no_auth_header_gets_401" time="0.022" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCTokenClaims" name="test_token_contains_groups_claim" time="0.064" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCTokenClaims" name="test_token_contains_preferred_username" time="0.062" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCTokenClaims" name="test_different_users_have_different_groups" time="0.143" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCMultiUser" name="test_bob_sre_can_mint_api_key" time="0.090" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCMultiUser" name="test_wrong_password_gets_rejected" time="0.057" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCMultiUser" name="test_nonexistent_user_gets_rejected" time="0.057" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCModelAccess" name="test_minted_api_key_can_list_models_and_infer" time="0.180" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCModelAccess" name="test_revoked_api_key_cannot_access_models" time="3.141" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCModelAccess" name="test_oidc_user_without_group_access_gets_empty_list" time="0.000"><skipped type="pytest.skip" message="OIDC_USERNAME_NO_ACCESS and OIDC_PASSWORD_NO_ACCESS not configured">/workspace/source/test/e2e/tests/test_external_oidc.py:420: OIDC_USERNAME_NO_ACCESS and OIDC_PASSWORD_NO_ACCESS not configured</skipped></testcase><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCMultiTenant" name="test_tenant_b_token_rejected_by_maas" time="0.146" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCMultiTenant" name="test_tenant_a_users_are_isolated" time="0.183" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCAPIKeyLifecycle" name="test_create_and_revoke_api_key" time="0.129" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCHeaderInjection" name="test_injected_username_header_ignored" time="0.124" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCHeaderInjection" name="test_injected_group_header_does_not_escalate" time="0.137" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCHeaderInjection" name="test_injected_subscription_header_ignored" time="0.145" /><testcase classname="test.e2e.tests.test_external_oidc.TestOIDCHeaderInjection" name="test_injected_username_on_oidc_token_ignored" time="0.082" /></testsuite></testsuites>