Source code for ibm_watsonx_ai.foundation_models.inference.audio_model_inference
# -----------------------------------------------------------------------------------------# (C) Copyright IBM Corp. 2025.# https://opensource.org/licenses/BSD-3-Clause# -----------------------------------------------------------------------------------------from__future__importannotationsfrompathlibimportPathfromtypingimportTYPE_CHECKINGfromibm_watsonx_ai.wml_client_errorimportInvalidMultipleArgumentsfromibm_watsonx_ai.wml_resourceimportWMLResourceifTYPE_CHECKING:fromibm_watsonx_aiimportAPIClient,Credentials
[docs]classAudioModelInference(WMLResource):""" Instantiate the audio model interface :param model: type of model to use :type model: str, optional :param credentials: credentials for the Watson Machine Learning instance :type credentials: Credentials or dict, optional :param project_id: ID of the Watson Studio project :type project_id: str, optional :param space_id: ID of the Watson Studio space :type space_id: str, optional :param verify: You can pass one of the following as verify: * the path to a CA_BUNDLE file * the path of directory with certificates of trusted CAs * `True` - default path to truststore will be taken * `False` - no verification will be made :type verify: bool or str, optional :param api_client: initialized APIClient object with a set project ID or space ID. If passed, ``credentials`` and ``project_id``/``space_id`` are not required. :type api_client: APIClient, optional **Example:** .. code-block:: python from ibm_watsonx_ai import Credentials from ibm_watsonx_ai.foundation_models import AudioModelInference audio_model = AudioModelInference( model="<AUDIO MODEL>", credentials=Credentials( api_key = IAM_API_KEY, url = "https://us-south.ml.cloud.ibm.com" ), project_id=project_id ) """def__init__(self,model:str,credentials:Credentials|None=None,project_id:str|None=None,space_id:str|None=None,verify:bool|str|None=None,api_client:APIClient|None=None,)->None:self.model=modelifcredentials:fromibm_watsonx_aiimportAPIClientself._client=APIClient(credentials,verify=verify)elifapi_client:self._client=api_clientelse:raiseInvalidMultipleArguments(params_names_list=["credentials","api_client"],reason="None of the arguments were provided.",)ifspace_id:self._client.set.default_space(space_id)elifproject_id:self._client.set.default_project(project_id)elifnotapi_client:raiseInvalidMultipleArguments(params_names_list=["space_id","project_id"],reason="None of the arguments were provided.",)WMLResource.__init__(self,__name__,self._client)
[docs]deftranscribe(self,file_path:str|Path,language:str|None=None,)->dict:""" Transcribe audio into text. :param file_path: The path to the audio file to transcribe :type file_path: str, Path, required :param language: Target language to which to transcribe, e.g. 'fr' for French. Default is English. :type language: str, optional **Example:** .. code-block:: python file_path = "sample_audio.mp3" response = audio_model.transcribe(file_path=file_path) """self._client._check_if_either_is_set()ifisinstance(file_path,str):file_path=Path(file_path)headers=self._client._get_headers()headers.pop("Content-Type",None)ifself._client.default_space_id:headers["Authorization"]+=f";space_id={self._client.default_space_id}"elifself._client.default_project_id:headers["Authorization"]+=f";project_id={self._client.default_project_id}"payload:dict={"model":self.model}iflanguageisnotNone:payload["language"]=languagewithfile_path.open("rb")asfile:files={"file":(file_path.name,file,"multipart/form-data")}response=self._client.httpx_client.post(url=self._client._href_definitions.get_audio_transcriptions_href(),data=payload,files=files,headers=headers,)returnself._handle_response(200,"transcribe",response)