# -----------------------------------------------------------------------------------------
# (C) Copyright IBM Corp. 2025.
# https://opensource.org/licenses/BSD-3-Clause
# -----------------------------------------------------------------------------------------
from typing import TypedDict
import pandas as pd
from ibm_watsonx_ai import APIClient
from ibm_watsonx_ai.wml_resource import WMLResource
[docs]
class RateLimitSettings(TypedDict):
"""
Model Gateway rate limit settings.
:param amount: amount is the number of tokens refilled into the bucket each interval
:type amount: int
:param capacity: capacity is the maximum number of tokens (requests) the bucket can hold
:type capacity: int
:param duration: duration is the refill interval, formatted as a Go duration string
(for more information please see: https://pkg.go.dev/time#ParseDuration)
:type duration: str
"""
amount: int
capacity: int
duration: str
[docs]
class RateLimits(WMLResource):
"""Model Gateway rate limits class."""
def __init__(self, api_client: APIClient):
WMLResource.__init__(self, __name__, api_client)
def _create(
self,
*,
payload: dict,
request: RateLimitSettings | None,
token: RateLimitSettings | None,
) -> dict:
if request:
payload["request"] = request
if token:
payload["token"] = token
response = self._client.httpx_client.post(
self._client._href_definitions.get_gateway_rate_limits_href(),
headers=self._client._get_headers(),
json=payload,
)
return self._handle_response(201, "rate limit creation", response)
[docs]
def create_for_tenant(
self,
*,
request: RateLimitSettings | None = None,
token: RateLimitSettings | None = None,
) -> dict:
"""Create rate limit for tenant in Model Gateway.
:param request: request rate limiting settings
:type request: RateLimitSettings, optional
:param token: token rate limiting settings
:type token: RateLimitSettings, optional
:returns: rate limit details
:rtype: dict
"""
return self._create(
payload={"type": "tenant"},
request=request,
token=token,
)
[docs]
def create_for_provider(
self,
provider_id: str,
*,
request: RateLimitSettings | None = None,
token: RateLimitSettings | None = None,
) -> dict:
"""Create rate limit for provider in Model Gateway.
:param provider_id: ID of the Model Gateway provider
:type provider_id: str
:param request: request rate limiting settings
:type request: RateLimitSettings, optional
:param token: token rate limiting settings
:type token: RateLimitSettings, optional
:returns: rate limit details
:rtype: dict
"""
return self._create(
payload={"type": "provider", "provider_uuid": provider_id},
request=request,
token=token,
)
[docs]
def create_for_model(
self,
model_id: str,
*,
request: RateLimitSettings | None = None,
token: RateLimitSettings | None = None,
) -> dict:
"""Create rate limit for model in Model Gateway.
:param model_id: ID of the Model Gateway model
:type model_id: str
:param request: request rate limiting settings
:type request: RateLimitSettings, optional
:param token: token rate limiting settings
:type token: RateLimitSettings, optional
:returns: rate limit details
:rtype: dict
"""
return self._create(
payload={"type": "model", "model_uuid": model_id},
request=request,
token=token,
)
def _update(
self,
*,
rate_limit_id: str,
payload: dict,
request: RateLimitSettings | None,
token: RateLimitSettings | None,
) -> dict:
if request:
payload["request"] = request
if token:
payload["token"] = token
response = self._client.httpx_client.put(
self._client._href_definitions.get_gateway_rate_limit_href(rate_limit_id),
headers=self._client._get_headers(),
json=payload,
)
return self._handle_response(200, "rate limit update", response)
[docs]
def update_for_tenant(
self,
rate_limit_id: str,
*,
request: RateLimitSettings | None = None,
token: RateLimitSettings | None = None,
) -> dict:
"""Update rate limit for tenant in Model Gateway.
:param rate_limit_id: ID of the rate limit
:type rate_limit_id: str
:param request: request rate limiting settings
:type request: RateLimitSettings, optional
:param token: token rate limiting settings
:type token: RateLimitSettings, optional
:returns: rate limit details
:rtype: dict
"""
return self._update(
rate_limit_id=rate_limit_id,
payload={"type": "tenant"},
request=request,
token=token,
)
[docs]
def update_for_provider(
self,
rate_limit_id: str,
provider_id: str,
*,
request: RateLimitSettings | None = None,
token: RateLimitSettings | None = None,
) -> dict:
"""Update rate limit for provider in Model Gateway.
:param rate_limit_id: ID of the rate limit
:type rate_limit_id: str
:param provider_id: ID of the Model Gateway provider
:type provider_id: str
:param request: request rate limiting settings
:type request: RateLimitSettings, optional
:param token: token rate limiting settings
:type token: RateLimitSettings, optional
:returns: rate limit details
:rtype: dict
"""
return self._update(
rate_limit_id=rate_limit_id,
payload={"type": "provider", "provider_uuid": provider_id},
request=request,
token=token,
)
[docs]
def update_for_model(
self,
rate_limit_id: str,
model_id: str,
*,
request: RateLimitSettings | None = None,
token: RateLimitSettings | None = None,
) -> dict:
"""Update rate limit for model in Model Gateway.
:param rate_limit_id: ID of the rate limit
:type rate_limit_id: str
:param model_id: ID of the Model Gateway model
:type model_id: str
:param request: request rate limiting settings
:type request: RateLimitSettings, optional
:param token: token rate limiting settings
:type token: RateLimitSettings, optional
:returns: rate limit details
:rtype: dict
"""
return self._update(
rate_limit_id=rate_limit_id,
payload={"type": "model", "model_uuid": model_id},
request=request,
token=token,
)
[docs]
def get_details(self, *, rate_limit_id: str | None = None) -> dict:
"""Get details of rate limits.
If ``rate_limit_id`` is specified, returns details of that rate limit.
:param rate_limit_id: ID of the rate limit
:type rate_limit_id: str, optional
:returns: details of rate limits or rate limit if ``rate_limit_id`` is specified
:rtype: dict
"""
url = (
self._client._href_definitions.get_gateway_rate_limit_href(rate_limit_id)
if rate_limit_id
else self._client._href_definitions.get_gateway_rate_limits_href()
)
response = self._client.httpx_client.get(
url=url, headers=self._client._get_headers()
)
return self._handle_response(200, "getting rate limit details", response)
[docs]
def list(self) -> pd.DataFrame:
"""List rate limits registered in Model Gateway.
:returns: dataframe containing list results
:rtype: pandas.DataFrame
"""
rate_limit_details = self.get_details()["data"]
rate_limit_values = [
(item["uuid"], item["type"], "request" in item, "token" in item)
for item in rate_limit_details
]
table = self._list(
rate_limit_values, ["ID", "TYPE", "FOR_REQUESTS", "FOR_TOKENS"], limit=None
)
return table
[docs]
def delete(self, rate_limit_id: str) -> str:
"""Delete rate limit from Model Gateway.
:param rate_limit_id: ID of the rate limit
:type rate_limit_id: str
:return: status "SUCCESS" if deletion is successful
:rtype: Literal["SUCCESS"]
:raises: ApiRequestFailure if deletion failed
"""
response = self._client.httpx_client.delete(
self._client._href_definitions.get_gateway_rate_limit_href(rate_limit_id),
headers=self._client._get_headers(),
)
return self._handle_response(
204, "model deletion", response, json_response=False
)
[docs]
@staticmethod
def get_id(rate_limit_details: dict) -> str:
"""Get rate limit ID from rate limit details.
:param rate_limit_details: details of the rate limit
:type rate_limit_details: dict
:returns: ID of the rate limit
:rtype: str
"""
return rate_limit_details["uuid"]