Source code for ibm_watsonx_ai.foundation_models.ilab.synthetic_data

#  -----------------------------------------------------------------------------------------
#  (C) Copyright IBM Corp. 2025.
#  https://opensource.org/licenses/BSD-3-Clause
#  -----------------------------------------------------------------------------------------

from __future__ import annotations

import json
from typing import TYPE_CHECKING
import logging

from ibm_watsonx_ai._wrappers import requests
from ibm_watsonx_ai.foundation_models.ilab.helper import wait_for_run_finish, BaseRuns
from ibm_watsonx_ai.foundation_models.ilab.taxonomies import Taxonomy
from ibm_watsonx_ai.helpers.connections import DataConnection
from ibm_watsonx_ai.wml_client_error import WMLClientError
from ibm_watsonx_ai.wml_resource import WMLResource

if TYPE_CHECKING:
    from ibm_watsonx_ai import APIClient


[docs] class SyntheticDataGeneration: """Class of InstructLab synthetic data generation run.""" id: str def __init__(self, name: str, api_client: APIClient) -> None: self.name = name self._client = api_client self._href_definitions = self._client.service_instance._href_definitions
[docs] def get_results_reference(self) -> DataConnection: """Get results reference to generated synthetic data. :returns: data connection to generated synthetic data :rtype: DataConnection """ return DataConnection.from_dict( self.get_run_details()["entity"]["results_reference"] )
[docs] def get_run_details(self) -> dict: """Get synthetic data generation details :return: details of synthetic data generation :rtype: dict """ if self.id is None: raise WMLClientError("Run in not started, operation cannot be performed.") response = requests.get( url=self._href_definitions.get_synthetic_data_generation_href(self.id), params=self._client._params(), headers=self._client._get_headers(), ) return self._client.repository._handle_response( 200, "getting synthetic data generation details", response )
[docs] def get_run_status(self) -> str: """Get synthetic data generation status :return: status of synthetic data generation :rtype: str """ return self.get_run_details()["entity"].get("status", {}).get("state")
[docs] def delete_run(self) -> str: """Delete synthetic data generation run""" if self.id is None: raise WMLClientError("Run in not started, operation cannot be performed.") params = self._client._params() params["hard_delete"] = "true" response = requests.delete( url=self._href_definitions.get_synthetic_data_generation_href(self.id), params=params, headers=self._client._get_headers(), ) return self._client.repository._handle_response( 204, "deletion of synthetic data generation", response, json_response=False )
[docs] def cancel_run(self) -> str: """Cancel synthetic data generation run""" if self.id is None: raise WMLClientError("Run in not started, operation cannot be performed.") response = requests.delete( url=self._href_definitions.get_synthetic_data_generation_href(self.id), params=self._client._params(), headers=self._client._get_headers(), ) return self._client.repository._handle_response( 204, "cancelation of synthetic data generation", response, json_response=False, )
[docs] class SDGRuns(BaseRuns): """Class of InstructLab synthetic generation runs.""" def __init__(self, api_client: APIClient) -> None: url = ( api_client.service_instance._href_definitions.get_synthetic_data_generations_href() ) BaseRuns.__init__(self, api_client, url)
[docs] def get_synthetic_data_generation(self, sdg_id: str) -> SyntheticDataGeneration: """Get synthetic data generation object :param sdg_id: id of synthetic data generation object :type sdg_id: str :returns: synthetic data generation object :rtype: SyntheticDataGeneration """ sdg_details = self.get_run_details(sdg_id) sdg = SyntheticDataGeneration( sdg_details.get("metadata", {}).get("name"), self._client ) sdg.id = sdg_id return sdg
[docs] class SyntheticData(WMLResource): """Class of InstructLab synthetic data generation module.""" _logger = logging.getLogger(__name__) def __init__(self, ilab_tuner_name: str, api_client: APIClient) -> None: WMLResource.__init__(self, "synthetic data generation", api_client) self.ilab_tuner_name = ilab_tuner_name self._client = api_client self._href_definitions = self._client.service_instance._href_definitions
[docs] def generate( self, *, name: str | None = None, taxonomy: Taxonomy, background_mode: bool = False, ) -> SyntheticDataGeneration: """Generate synthetic data from updated taxonomy :param name: name of synthetic data generation run :type name: str :param taxonomy: taxonomy object :type taxonomy: Taxonomy :param background_mode: indicator if the method will run in the background, async or sync :type background_mode: bool, optional :returns: synthetic data generation run object :rtype: SyntheticDataGeneration """ sdg = SyntheticDataGeneration( name if name else f"{self.ilab_tuner_name} - Synthetic Data Generation", self._client, ) payload = { "name": sdg.name, "data_reference": taxonomy.get_taxonomy_import().get_run_details()[ "entity" ]["results_reference"], } params = self._client._params() if self._client.default_project_id: payload["project_id"] = self._client.default_project_id params.pop("project_id") elif self._client.default_space_id: payload["space_id"] = self._client.default_space_id params.pop("space_id") response = requests.post( url=self._href_definitions.get_synthetic_data_generations_href(), json=payload, params=params, headers=self._client._get_headers(), ) res = self._handle_response(201, "running synthetic data generation", response) sdg.id = res["metadata"]["id"] if not background_mode: wait_for_run_finish( asked_object=sdg, res_name="Synthetic data generation", logger=self._logger, ) return sdg
[docs] def runs(self) -> SDGRuns: """Get the historical runs. :returns: runs object :rtype: SDGRuns """ return SDGRuns(self._client)