# -----------------------------------------------------------------------------------------
# (C) Copyright IBM Corp. 2025.
# https://opensource.org/licenses/BSD-3-Clause
# -----------------------------------------------------------------------------------------
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
from ibm_watsonx_ai.wml_client_error import InvalidMultipleArguments
from ibm_watsonx_ai.wml_resource import WMLResource
if TYPE_CHECKING:
from ibm_watsonx_ai import APIClient, Credentials
[docs]
class AudioModelInference(WMLResource):
"""
Instantiate the audio model interface
:param model: type of model to use
:type model: str, optional
:param credentials: credentials for the Watson Machine Learning instance
:type credentials: Credentials or dict, optional
:param project_id: ID of the Watson Studio project
:type project_id: str, optional
:param space_id: ID of the Watson Studio space
:type space_id: str, optional
:param verify: You can pass one of the following as verify:
* the path to a CA_BUNDLE file
* the path of directory with certificates of trusted CAs
* `True` - default path to truststore will be taken
* `False` - no verification will be made
:type verify: bool | str | Path, optional
:param api_client: initialized APIClient object with a set project ID or space ID. If passed, ``credentials`` and ``project_id``/``space_id`` are not required.
:type api_client: APIClient, optional
**Example:**
.. code-block:: python
from ibm_watsonx_ai import Credentials
from ibm_watsonx_ai.foundation_models import AudioModelInference
audio_model = AudioModelInference(
model="<AUDIO MODEL>",
credentials=Credentials(
api_key=IAM_API_KEY, url="https://us-south.ml.cloud.ibm.com"
),
project_id=project_id,
)
"""
def __init__(
self,
model: str,
credentials: Credentials | None = None,
project_id: str | None = None,
space_id: str | None = None,
verify: bool | str | Path | None = None,
api_client: APIClient | None = None,
) -> None:
self.model = model
if isinstance(verify, Path):
verify = str(verify)
if credentials:
from ibm_watsonx_ai import APIClient
self._client = APIClient(credentials, verify=verify)
elif api_client:
self._client = api_client
else:
raise InvalidMultipleArguments(
params_names_list=["credentials", "api_client"],
reason="None of the arguments were provided.",
)
if space_id:
self._client.set.default_space(space_id)
elif project_id:
self._client.set.default_project(project_id)
elif not api_client:
raise InvalidMultipleArguments(
params_names_list=["space_id", "project_id"],
reason="None of the arguments were provided.",
)
WMLResource.__init__(self, __name__, self._client)
[docs]
def transcribe(
self,
file_path: str | Path,
language: str | None = None,
) -> dict:
"""
Transcribe audio into text.
:param file_path: The path to the audio file to transcribe
:type file_path: str, Path, required
:param language: Target language to which to transcribe, e.g. 'fr' for French. Default is English.
:type language: str, optional
**Example:**
.. code-block:: python
file_path = "sample_audio.mp3"
response = audio_model.transcribe(file_path=file_path)
"""
self._client._check_if_either_is_set()
if isinstance(file_path, str):
file_path = Path(file_path)
headers = self._client._get_headers()
headers.pop("Content-Type", None)
if self._client.default_space_id:
headers["Authorization"] += f";space_id={self._client.default_space_id}"
elif self._client.default_project_id:
headers["Authorization"] += f";project_id={self._client.default_project_id}"
payload: dict = {"model": self.model}
if language is not None:
payload["language"] = language
with file_path.open("rb") as file:
files = {"file": (file_path.name, file, "multipart/form-data")}
response = self._client.httpx_client.post(
url=self._client._href_definitions.get_audio_transcriptions_href(),
data=payload,
files=files,
headers=headers,
)
return self._handle_response(200, "transcribe", response)