Source code for ibm_watson_machine_learning.foundation_models.inference.model_inference

#  -----------------------------------------------------------------------------------------
#  (C) Copyright IBM Corp. 2023-2024.
#  https://opensource.org/licenses/BSD-3-Clause
#  -----------------------------------------------------------------------------------------

from __future__ import print_function

from ibm_watson_machine_learning import APIClient
from ibm_watson_machine_learning.wml_client_error import WMLClientError, ParamOutOfRange, InvalidMultipleArguments
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
from .base_model_inference import BaseModelInference
from .fm_model_inference import FMModelInference
from .deployment_model_inference import DeploymentModelInference

_DEFAULT_LIST_LENGTH = 50


[docs] class ModelInference(BaseModelInference): """Instantiate the model interface. .. hint:: To use the ModelInference class with LangChain, use the :func:`WatsonxLLM <ibm_watson_machine_learning.foundation_models.extensions.langchain.WatsonxLLM>` wrapper. :param model_id: the type of model to use :type model_id: str, optional :param deployment_id: ID of tuned model's deployment :type deployment_id: str, optional :param credentials: credentials to Watson Machine Learning instance :type credentials: dict, optional :param params: parameters to use during generate requests :type params: dict, optional :param project_id: ID of the Watson Studio project :type project_id: str, optional :param space_id: ID of the Watson Studio space :type space_id: str, optional :param verify: user can pass as verify one of following: - the path to a CA_BUNDLE file - the path of directory with certificates of trusted CAs - `True` - default path to truststore will be taken - `False` - no verification will be made :type verify: bool or str, optional :param api_client: Initialized APIClient object with set project or space ID. If passed, ``credentials`` and ``project_id``/``space_id`` are not required. :type api_client: APIClient, optional .. note:: One of these parameters is required: [``model_id``, ``deployment_id``] .. note:: One of these parameters is required: [``project_id``, ``space_id``] when ``credentials`` parameter passed. .. hint:: You can copy the project_id from Project's Manage tab (Project -> Manage -> General -> Details). **Example** .. code-block:: python from ibm_watson_machine_learning.foundation_models import ModelInference from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods # To display example params enter GenParams().get_example_values() generate_params = { GenParams.MAX_NEW_TOKENS: 25 } model_inference = ModelInference( model_id=ModelTypes.FLAN_UL2, params=generate_params, credentials={ "apikey": "***", "url": "https://us-south.ml.cloud.ibm.com" }, project_id="*****" ) .. code-block:: python from ibm_watson_machine_learning.foundation_models import ModelInference deployment_inference = ModelInference( deployment_id="<ID of deployed model>", credentials={ "apikey": "***", "url": "https://us-south.ml.cloud.ibm.com" }, project_id="*****" ) """ def __init__(self, *, model_id: str = None, deployment_id: str = None, params: dict = None, credentials: dict = None, project_id: str = None, space_id: str = None, verify=None, api_client: APIClient = None) -> None: self.model_id = model_id if isinstance(self.model_id, ModelTypes): self.model_id = self.model_id.value self.deployment_id = deployment_id if self.model_id and self.deployment_id: raise InvalidMultipleArguments(params_names_list=["model_id", "deployment_id"], reason="Both arguments were provided.") elif not self.model_id and not self.deployment_id: raise InvalidMultipleArguments(params_names_list=["model_id", "deployment_id"], reason="None of the arguments were provided.") self.params = params ModelInference._validate_type(params, u'params', dict, False) if credentials: self._client = APIClient(credentials, verify=verify) elif api_client: self._client = api_client else: raise InvalidMultipleArguments(params_names_list=["credentials", "api_client"], reason="None of the arguments were provided.") if space_id: self._client.set.default_space(space_id) elif project_id: self._client.set.default_project(project_id) elif not api_client: raise InvalidMultipleArguments(params_names_list=["space_id", "project_id"], reason="None of the arguments were provided.") if not self._client.CLOUD_PLATFORM_SPACES and self._client.CPD_version < 4.8: raise WMLClientError(error_msg="Operation is unsupported for this release.") if self.model_id: self._inference = FMModelInference(model_id=self.model_id, params=self.params, api_client=self._client) else: self._inference = DeploymentModelInference(deployment_id=self.deployment_id, params=self.params, api_client=self._client) BaseModelInference.__init__(self, __name__, self._client)
[docs] def get_details(self): """Get model interface's details :return: details of model or deployment :rtype: dict **Example** .. code-block:: python model_inference.get_details() """ return self._inference.get_details()
[docs] def generate(self, prompt=None, params=None, guardrails=False, guardrails_hap_params=None, guardrails_pii_params=None, concurrency_limit=10, async_mode=False): """Given a text prompt as input, and parameters the selected model (model_id) or deployment (deployment_id) will generate a completion text as generated_text. For prompt template deployment `prompt` should be None. :param params: meta props for text generation, use ``ibm_watson_machine_learning.metanames.GenTextParamsMetaNames().show()`` to view the list of MetaNames :type params: dict :param concurrency_limit: number of requests that will be sent in parallel, max is 10 :type concurrency_limit: int :param prompt: the prompt string or list of strings. If list of strings is passed requests will be managed in parallel with the rate of concurency_limit, defaults to None :type prompt: (str | list | None), optional :param guardrails: If True then potentially hateful, abusive, and/or profane language (HAP) detection filter is toggle on for both prompt and generated text, defaults to False :type guardrails: bool :param guardrails_hap_params: meta props for HAP moderations, use ``ibm_watson_machine_learning.metanames.GenTextModerationsMetaNames().show()`` to view the list of MetaNames :type guardrails_hap_params: dict :param async_mode: If True then yield results asynchronously (using generator). In this case both prompt and generated text will be concatenated in the final response - under `generated_text`, defaults to False :type async_mode: bool :return: scoring result containing generated content :rtype: dict **Example** .. code-block:: python q = "What is 1 + 1?" generated_response = model_inference.generate(prompt=q) print(generated_response['results'][0]['generated_text']) """ self._validate_type(params, u'params', dict, False) self._validate_type(concurrency_limit, 'concurrency_limit', [int, float], False, raise_error_for_list=True) if isinstance(concurrency_limit, float): # convert float (ex. 10.0) to int concurrency_limit = int(concurrency_limit) if concurrency_limit > 10 or concurrency_limit < 1: raise ParamOutOfRange(param_name='concurrency_limit', value=concurrency_limit, min=1, max=10) return self._inference.generate(prompt=prompt, params=params, guardrails=guardrails, guardrails_hap_params=guardrails_hap_params, guardrails_pii_params=guardrails_pii_params, concurrency_limit=concurrency_limit, async_mode=async_mode)
[docs] def generate_text(self, prompt=None, params=None, guardrails=False, guardrails_hap_params=None, guardrails_pii_params=None, raw_response=False, concurrency_limit=10): """Given a text prompt as input, and parameters the selected model (model_id) will generate a completion text as generated_text. For prompt template deployment `prompt` should be None. :param params: meta props for text generation, use ``ibm_watson_machine_learning.metanames.GenTextParamsMetaNames().show()`` to view the list of MetaNames :type params: dict :param concurrency_limit: number of requests that will be sent in parallel, max is 10 :type concurrency_limit: int :param prompt: the prompt string or list of strings. If list of strings is passed requests will be managed in parallel with the rate of concurency_limit, defaults to None :type prompt: (str | list | None), optional :param guardrails: If True then potentially hateful, abusive, and/or profane language (HAP) detection filter is toggle on for both prompt and generated text, defaults to False If HAP is detected the `HAPDetectionWarning` is issued :type guardrails: bool :param guardrails_hap_params: meta props for HAP moderations, use ``ibm_watson_machine_learning.metanames.GenTextModerationsMetaNames().show()`` to view the list of MetaNames :type guardrails_hap_params: dict :param raw_response: return the whole response object :type raw_response: bool, optional :return: generated content :rtype: str .. note:: By default only the first occurance of `HAPDetectionWarning` is displayed. To enable printing all warnings of this category, use: .. code-block:: python import warnings from ibm_watson_machine_learning.foundation_models.utils import HAPDetectionWarning warnings.filterwarnings("always", category=HAPDetectionWarning) **Example** .. code-block:: python q = "What is 1 + 1?" generated_text = model_inference.generate_text(prompt=q) print(generated_text) """ metadata = self.generate(prompt=prompt, params=params, guardrails=guardrails, guardrails_hap_params=guardrails_hap_params, guardrails_pii_params=guardrails_pii_params, concurrency_limit=concurrency_limit) if raw_response: return metadata else: if isinstance(prompt, list): return [self._return_guardrails_stats(single_response)['generated_text'] for single_response in metadata] else: return self._return_guardrails_stats(metadata)['generated_text']
[docs] def generate_text_stream(self, prompt=None, params=None, raw_response=False, guardrails=False, guardrails_hap_params=None, guardrails_pii_params=None): """Given a text prompt as input, and parameters the selected model (model_id) will generate a streamed text as generate_text_stream. For prompt template deployment `prompt` should be None. :param params: meta props for text generation, use ``ibm_watson_machine_learning.metanames.GenTextParamsMetaNames().show()`` to view the list of MetaNames :type params: dict :param prompt: the prompt string, defaults to None :type prompt: str, optional :param raw_response: yields the whole response object :type raw_response: bool, optional :param guardrails: If True then potentially hateful, abusive, and/or profane language (HAP) detection filter is toggle on for both prompt and generated text, defaults to False If HAP is detected the `HAPDetectionWarning` is issued :type guardrails: bool :param guardrails_hap_params: meta props for HAP moderations, use ``ibm_watson_machine_learning.metanames.GenTextModerationsMetaNames().show()`` to view the list of MetaNames :type guardrails_hap_params: dict :return: scoring result containing generated content :rtype: generator .. note:: By default only the first occurance of `HAPDetectionWarning` is displayed. To enable printing all warnings of this category, use: .. code-block:: python import warnings from ibm_watson_machine_learning.foundation_models.utils import HAPDetectionWarning warnings.filterwarnings("always", category=HAPDetectionWarning) **Example** .. code-block:: python q = "Write an epigram about the sun" generated_response = model_inference.generate_text_stream(prompt=q) for chunk in generated_response: print(chunk, end='') """ self._validate_type(params, u'params', dict, False) return self._inference.generate_text_stream(prompt=prompt, params=params, raw_response=raw_response, guardrails=guardrails, guardrails_hap_params=guardrails_hap_params, guardrails_pii_params=guardrails_pii_params)
[docs] def tokenize(self, prompt=None, return_tokens: bool = False): """ The text tokenize operation allows you to check the conversion of provided input to tokens for a given model. It splits text into words or sub-words, which then are converted to ids through a look-up table (vocabulary). Tokenization allows the model to have a reasonable vocabulary size. .. note:: Method is not supported for deployments, available only for base models. :param prompt: the prompt string, defaults to None :type prompt: str, optional :param return_tokens: the parameter for text tokenization, defaults to False :type return_tokens: bool :return: the result of tokenizing the input string. :rtype: dict **Example** .. code-block:: python q = "Write an epigram about the moon" tokenized_response = model_inference.tokenize(prompt=q, return_tokens=True) print(tokenized_response["result"]) """ return self._inference.tokenize(prompt=prompt, return_tokens=return_tokens)
[docs] def to_langchain(self): """ :return: WatsonxLLM wrapper for watsonx foundation models :rtype: WatsonxLLM **Example** .. code-block:: python from langchain import PromptTemplate from langchain.chains import LLMChain from ibm_watson_machine_learning.foundation_models import ModelInference from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes flan_ul2_model = ModelInference( model_id=ModelTypes.FLAN_UL2, credentials={ "apikey": "***", "url": "https://us-south.ml.cloud.ibm.com" }, project_id="*****" ) prompt_template = "What color is the {flower}?" llm_chain = LLMChain(llm=flan_ul2_model.to_langchain(), prompt=PromptTemplate.from_template(prompt_template)) llm_chain('sunflower') .. code-block:: python from langchain import PromptTemplate from langchain.chains import LLMChain from ibm_watson_machine_learning.foundation_models import ModelInference from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes deployed_model = ModelInference( deployment_id="<ID of deployed model>", credentials={ "apikey": "***", "url": "https://us-south.ml.cloud.ibm.com" }, space_id="*****" ) prompt_template = "What color is the {car}?" llm_chain = LLMChain(llm=deployed_model.to_langchain(), prompt=PromptTemplate.from_template(prompt_template)) llm_chain('sunflower') """ from ibm_watson_machine_learning.foundation_models.extensions.langchain.llm import WatsonxLLM return WatsonxLLM(self)
[docs] def get_identifying_params(self) -> dict: """Represent Model Inference's setup in dictionary""" return self._inference.get_identifying_params()