Source code for ibm_watson_machine_learning.foundation_models.inference.model_inference

#  -----------------------------------------------------------------------------------------
#  (C) Copyright IBM Corp. 2023-2024.
#  https://opensource.org/licenses/BSD-3-Clause
#  -----------------------------------------------------------------------------------------

from __future__ import print_function

from ibm_watson_machine_learning import APIClient
from ibm_watson_machine_learning.wml_client_error import WMLClientError, ParamOutOfRange, InvalidMultipleArguments
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
from .base_model_inference import BaseModelInference
from .fm_model_inference import FMModelInference
from .deployment_model_inference import DeploymentModelInference

_DEFAULT_LIST_LENGTH = 50



[docs]
class ModelInference(BaseModelInference):
    """Instantiate the model interface.

    .. hint::
        To use the ModelInference class with LangChain, use the :func:`WatsonxLLM <ibm_watson_machine_learning.foundation_models.extensions.langchain.WatsonxLLM>` wrapper.

    :param model_id: the type of model to use
    :type model_id: str, optional

    :param deployment_id: ID of tuned model's deployment
    :type deployment_id: str, optional

    :param credentials: credentials to Watson Machine Learning instance
    :type credentials: dict, optional

    :param params: parameters to use during generate requests
    :type params: dict, optional

    :param project_id: ID of the Watson Studio project
    :type project_id: str, optional

    :param space_id: ID of the Watson Studio space
    :type space_id: str, optional

    :param verify: user can pass as verify one of following:

        - the path to a CA_BUNDLE file
        - the path of directory with certificates of trusted CAs
        - `True` - default path to truststore will be taken
        - `False` - no verification will be made
    :type verify: bool or str, optional

    :param api_client: Initialized APIClient object with set project or space ID. If passed, ``credentials`` and ``project_id``/``space_id`` are not required.
    :type api_client: APIClient, optional

    .. note::
        One of these parameters is required: [``model_id``, ``deployment_id``]

    .. note::
        One of these parameters is required: [``project_id``, ``space_id``] when ``credentials`` parameter passed.

    .. hint::
        You can copy the project_id from Project's Manage tab (Project -> Manage -> General -> Details).

    **Example**

    .. code-block:: python

        from ibm_watson_machine_learning.foundation_models import ModelInference
        from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
        from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods

        # To display example params enter
        GenParams().get_example_values()

        generate_params = {
            GenParams.MAX_NEW_TOKENS: 25
        }

        model_inference = ModelInference(
            model_id=ModelTypes.FLAN_UL2,
            params=generate_params,
            credentials={
                "apikey": "***",
                "url": "https://us-south.ml.cloud.ibm.com"
            },
            project_id="*****"
            )

    .. code-block:: python

        from ibm_watson_machine_learning.foundation_models import ModelInference

        deployment_inference = ModelInference(
            deployment_id="<ID of deployed model>",
            credentials={
                "apikey": "***",
                "url": "https://us-south.ml.cloud.ibm.com"
            },
            project_id="*****"
            )

    """

    def __init__(self,
                 *,
                 model_id: str = None,
                 deployment_id: str = None,
                 params: dict = None,
                 credentials: dict = None,
                 project_id: str = None,
                 space_id: str = None,
                 verify=None,
                 api_client: APIClient = None) -> None:

        self.model_id = model_id
        if isinstance(self.model_id, ModelTypes):
            self.model_id = self.model_id.value

        self.deployment_id = deployment_id

        if self.model_id and self.deployment_id:
            raise InvalidMultipleArguments(params_names_list=["model_id", "deployment_id"],
                                           reason="Both arguments were provided.")
        elif not self.model_id and not self.deployment_id:
            raise InvalidMultipleArguments(params_names_list=["model_id", "deployment_id"],
                                           reason="None of the arguments were provided.")

        self.params = params
        ModelInference._validate_type(params, u'params', dict, False)

        if credentials:
            self._client = APIClient(credentials, verify=verify) 
        elif api_client:
            self._client = api_client
        else:
            raise InvalidMultipleArguments(params_names_list=["credentials", "api_client"],
                                           reason="None of the arguments were provided.")

        if space_id:
            self._client.set.default_space(space_id)
        elif project_id:
            self._client.set.default_project(project_id)
        elif not api_client:
            raise InvalidMultipleArguments(params_names_list=["space_id", "project_id"],
                                           reason="None of the arguments were provided.")
        if not self._client.CLOUD_PLATFORM_SPACES and self._client.CPD_version < 4.8:
            raise WMLClientError(error_msg="Operation is unsupported for this release.")

        if self.model_id:
            self._inference = FMModelInference(model_id=self.model_id,
                                               params=self.params,
                                               api_client=self._client)
        else:
            self._inference = DeploymentModelInference(deployment_id=self.deployment_id,
                                                       params=self.params,
                                                       api_client=self._client)
        BaseModelInference.__init__(self, __name__, self._client)


[docs]
    def get_details(self):
        """Get model interface's details

        :return: details of model or deployment
        :rtype: dict

        **Example**

        .. code-block:: python

            model_inference.get_details()

        """
        return self._inference.get_details()



[docs]
    def generate(self,
                 prompt=None,
                 params=None,
                 guardrails=False,
                 guardrails_hap_params=None,
                 guardrails_pii_params=None,
                 concurrency_limit=10,
                 async_mode=False):
        """Given a text prompt as input, and parameters the selected model (model_id) or deployment (deployment_id)
        will generate a completion text as generated_text. For prompt template deployment `prompt` should be None.

        :param params: meta props for text generation, use ``ibm_watson_machine_learning.metanames.GenTextParamsMetaNames().show()`` to view the list of MetaNames
        :type params: dict

        :param concurrency_limit: number of requests that will be sent in parallel, max is 10
        :type concurrency_limit: int

        :param prompt: the prompt string or list of strings. If list of strings is passed requests will be managed in parallel with the rate of concurency_limit, defaults to None
        :type prompt: (str | list | None), optional

        :param guardrails: If True then potentially hateful, abusive, and/or profane language (HAP) detection 
                           filter is toggle on for both prompt and generated text, defaults to False
        :type guardrails: bool

        :param guardrails_hap_params: meta props for HAP moderations, use ``ibm_watson_machine_learning.metanames.GenTextModerationsMetaNames().show()``
                                      to view the list of MetaNames
        :type guardrails_hap_params: dict

        :param async_mode: If True then yield results asynchronously (using generator). In this case both prompt and
                           generated text will be concatenated in the final response - under `generated_text`, defaults
                           to False
        :type async_mode: bool

        :return: scoring result containing generated content
        :rtype: dict

        **Example**

        .. code-block:: python

            q = "What is 1 + 1?"
            generated_response = model_inference.generate(prompt=q)
            print(generated_response['results'][0]['generated_text'])

        """
        self._validate_type(params, u'params', dict, False)
        self._validate_type(concurrency_limit, 'concurrency_limit', [int, float], False,
                            raise_error_for_list=True)

        if isinstance(concurrency_limit, float):  # convert float (ex. 10.0) to int
            concurrency_limit = int(concurrency_limit)

        if concurrency_limit > 10 or concurrency_limit < 1:
            raise ParamOutOfRange(param_name='concurrency_limit', value=concurrency_limit, min=1, max=10)

        return self._inference.generate(prompt=prompt,
                                        params=params,
                                        guardrails=guardrails,
                                        guardrails_hap_params=guardrails_hap_params,
                                        guardrails_pii_params=guardrails_pii_params,
                                        concurrency_limit=concurrency_limit,
                                        async_mode=async_mode)



[docs]
    def generate_text(self,
                      prompt=None,
                      params=None,
                      guardrails=False,
                      guardrails_hap_params=None,
                      guardrails_pii_params=None,
                      raw_response=False,
                      concurrency_limit=10):
        """Given a text prompt as input, and parameters the selected model (model_id)
        will generate a completion text as generated_text. For prompt template deployment `prompt` should be None.

        :param params: meta props for text generation, use ``ibm_watson_machine_learning.metanames.GenTextParamsMetaNames().show()`` to view the list of MetaNames
        :type params: dict

        :param concurrency_limit: number of requests that will be sent in parallel, max is 10
        :type concurrency_limit: int

        :param prompt: the prompt string or list of strings. If list of strings is passed requests will be managed in parallel with the rate of concurency_limit, defaults to None
        :type prompt: (str | list | None), optional

        :param guardrails: If True then potentially hateful, abusive, and/or profane language (HAP) detection filter is toggle on for both prompt and generated text, defaults to False
                           If HAP is detected the `HAPDetectionWarning` is issued
        :type guardrails: bool

        :param guardrails_hap_params: meta props for HAP moderations, use ``ibm_watson_machine_learning.metanames.GenTextModerationsMetaNames().show()``
                                      to view the list of MetaNames
        :type guardrails_hap_params: dict

        :param raw_response: return the whole response object
        :type raw_response: bool, optional

        :return: generated content
        :rtype: str

        .. note::
            By default only the first occurance of `HAPDetectionWarning` is displayed. To enable printing all warnings of this category, use:

            .. code-block:: python

                import warnings
                from ibm_watson_machine_learning.foundation_models.utils import HAPDetectionWarning

                warnings.filterwarnings("always", category=HAPDetectionWarning)
            

        **Example**

        .. code-block:: python

            q = "What is 1 + 1?"
            generated_text = model_inference.generate_text(prompt=q)
            print(generated_text)

        """
        metadata = self.generate(prompt=prompt, params=params,
                                 guardrails=guardrails,
                                 guardrails_hap_params=guardrails_hap_params,
                                 guardrails_pii_params=guardrails_pii_params,
                                 concurrency_limit=concurrency_limit)
        if raw_response:
            return metadata
        else:
            if isinstance(prompt, list):
                return [self._return_guardrails_stats(single_response)['generated_text'] for single_response in metadata]
            else:
                return self._return_guardrails_stats(metadata)['generated_text']



[docs]
    def generate_text_stream(self,
                             prompt=None,
                             params=None,
                             raw_response=False,
                             guardrails=False,
                             guardrails_hap_params=None,
                             guardrails_pii_params=None):
        """Given a text prompt as input, and parameters the selected model (model_id)
        will generate a streamed text as generate_text_stream. For prompt template deployment `prompt` should be None.

        :param params: meta props for text generation, use ``ibm_watson_machine_learning.metanames.GenTextParamsMetaNames().show()`` to view the list of MetaNames
        :type params: dict

        :param prompt: the prompt string, defaults to None
        :type prompt: str, optional

        :param raw_response: yields the whole response object
        :type raw_response: bool, optional

        :param guardrails: If True then potentially hateful, abusive, and/or profane language (HAP) detection filter is toggle on for both prompt and generated text, defaults to False
                           If HAP is detected the `HAPDetectionWarning` is issued
        :type guardrails: bool

        :param guardrails_hap_params: meta props for HAP moderations, use ``ibm_watson_machine_learning.metanames.GenTextModerationsMetaNames().show()``
                                      to view the list of MetaNames
        :type guardrails_hap_params: dict

        :return: scoring result containing generated content
        :rtype: generator

        .. note::
            By default only the first occurance of `HAPDetectionWarning` is displayed. To enable printing all warnings of this category, use:

            .. code-block:: python
            
                import warnings
                from ibm_watson_machine_learning.foundation_models.utils import HAPDetectionWarning

                warnings.filterwarnings("always", category=HAPDetectionWarning)

        **Example**

        .. code-block:: python

            q = "Write an epigram about the sun"
            generated_response = model_inference.generate_text_stream(prompt=q)

            for chunk in generated_response:
                print(chunk, end='')

        """
        self._validate_type(params, u'params', dict, False)

        return self._inference.generate_text_stream(prompt=prompt,
                                                    params=params,
                                                    raw_response=raw_response,
                                                    guardrails=guardrails,
                                                    guardrails_hap_params=guardrails_hap_params,
                                                    guardrails_pii_params=guardrails_pii_params)



[docs]
    def tokenize(self,
                 prompt=None,
                 return_tokens: bool = False):
        """
        The text tokenize operation allows you to check the conversion of provided input to tokens for a given model.
        It splits text into words or sub-words, which then are converted to ids through a look-up table (vocabulary).
        Tokenization allows the model to have a reasonable vocabulary size.

        .. note::
            Method is not supported for deployments, available only for base models.

        :param prompt: the prompt string, defaults to None
        :type prompt: str, optional

        :param return_tokens: the parameter for text tokenization, defaults to False
        :type return_tokens: bool

        :return: the result of tokenizing the input string.
        :rtype: dict

        **Example**

        .. code-block:: python

            q = "Write an epigram about the moon"
            tokenized_response = model_inference.tokenize(prompt=q, return_tokens=True)
            print(tokenized_response["result"])

        """
        return self._inference.tokenize(prompt=prompt,
                                        return_tokens=return_tokens)



[docs]
    def to_langchain(self):
        """

        :return: WatsonxLLM wrapper for watsonx foundation models
        :rtype: WatsonxLLM

        **Example**

        .. code-block:: python

            from langchain import PromptTemplate
            from langchain.chains import LLMChain
            from ibm_watson_machine_learning.foundation_models import ModelInference
            from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes

            flan_ul2_model = ModelInference(
                model_id=ModelTypes.FLAN_UL2,
                credentials={
                    "apikey": "***",
                    "url": "https://us-south.ml.cloud.ibm.com"
                },
                project_id="*****"
                )

            prompt_template = "What color is the {flower}?"

            llm_chain = LLMChain(llm=flan_ul2_model.to_langchain(), prompt=PromptTemplate.from_template(prompt_template))
            llm_chain('sunflower')

        .. code-block:: python

            from langchain import PromptTemplate
            from langchain.chains import LLMChain
            from ibm_watson_machine_learning.foundation_models import ModelInference
            from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes

            deployed_model = ModelInference(
                deployment_id="<ID of deployed model>",
                credentials={
                    "apikey": "***",
                    "url": "https://us-south.ml.cloud.ibm.com"
                },
                space_id="*****"
                )

            prompt_template = "What color is the {car}?"

            llm_chain = LLMChain(llm=deployed_model.to_langchain(), prompt=PromptTemplate.from_template(prompt_template))
            llm_chain('sunflower')

        """
        from ibm_watson_machine_learning.foundation_models.extensions.langchain.llm import WatsonxLLM
        return WatsonxLLM(self)



[docs]
    def get_identifying_params(self) -> dict:
        """Represent Model Inference's setup in dictionary"""
        return self._inference.get_identifying_params()