# -----------------------------------------------------------------------------------------
# (C) Copyright IBM Corp. 2023-2025.
# https://opensource.org/licenses/BSD-3-Clause
# -----------------------------------------------------------------------------------------
from __future__ import annotations
from datetime import datetime
from pandas import DataFrame
from ibm_watsonx_ai.foundation_models.ilab_tuner import ILabTuner
from ibm_watsonx_ai.foundation_models.prompt_tuner import PromptTuner
from ibm_watsonx_ai.foundation_models.fine_tuner import FineTuner
from ibm_watsonx_ai.wml_client_error import (
WMLClientError,
ApiRequestFailure,
)
from ibm_watsonx_ai.foundation_models.utils.utils import (
_is_fine_tuning_endpoint_available,
)
from ibm_watsonx_ai import APIClient
__all__ = ["TuneRuns"]
[docs]
class TuneRuns:
"""The TuneRuns class is used to work with historical PromptTuner and FineTuner runs.
:param client: APIClient to handle service operations
:type client: APIClient
:param filter: filter, choose which runs specifying the tuning name to fetch
:type filter: str, optional
:param limit: int number of records to be returned
:type limit: int
"""
def __init__(
self, client: APIClient, filter: str | None = None, limit: int = 50
) -> None:
self.client = client
self.tuning_name = filter
self.limit = limit
self._is_fine_tuning_endpoint_available = _is_fine_tuning_endpoint_available(
self.client
)
def __call__(self, *, filter: str | None = None, limit: int = 50) -> TuneRuns:
self.tuning_name = filter
self.limit = limit
return self
[docs]
def list(self) -> DataFrame:
"""Lists historical runs with their status. If you have a lot of runs stored in the service,
it might take a longer time to fetch all the information. If there is no limit set, it
gets the last 50 records.
:return: Pandas DataFrame with run IDs and status
:rtype: pandas.DataFrame
**Examples**
.. code-block:: python
from ibm_watsonx_ai.experiment import TuneExperiment
experiment = TuneExperiment(...)
df = experiment.runs.list()
"""
columns = ["timestamp", "run_id", "state", "tuning name"]
pt_runs_details = self.client.training.get_details(
get_all=True if self.tuning_name else False,
limit=None if self.tuning_name else self.limit,
training_type="prompt_tuning",
_internal=True,
)
records: list = []
for run in pt_runs_details["resources"]:
if len(records) >= self.limit:
break
if {"entity", "metadata"}.issubset(run.keys()):
timestamp = run["metadata"].get("modified_at")
run_id = run["metadata"].get("id", run["metadata"].get("guid"))
state = run["entity"].get("status", {}).get("state")
tuning_name = run["entity"].get("name", "Unknown")
record = [timestamp, run_id, state, tuning_name]
if self.tuning_name is None or (
self.tuning_name and self.tuning_name == tuning_name
):
records.append(record)
if self._is_fine_tuning_endpoint_available:
ft_runs_details = self.client.training.get_details(
get_all=True if self.tuning_name else False,
limit=None if self.tuning_name else self.limit,
_internal=True,
_is_fine_tuning=True,
)["resources"]
ilabt_runs_details = self.client.training.get_details(
get_all=True if self.tuning_name else False,
limit=None if self.tuning_name else self.limit,
training_type="ilab",
_internal=True,
_is_fine_tuning=True,
)["resources"]
ft_runs_details.extend(ilabt_runs_details)
for run in ft_runs_details:
if len(records) >= self.limit:
break
if {"entity", "metadata"}.issubset(run.keys()):
timestamp = run["metadata"].get("modified_at")
run_id = run["metadata"].get("id", run["metadata"].get("guid"))
state = run["entity"].get("status", {}).get("state")
tuning_name = run["metadata"].get("name", "Unknown")
record = [timestamp, run_id, state, tuning_name]
if self.tuning_name is None or (
self.tuning_name and self.tuning_name == tuning_name
):
records.append(record)
runs = DataFrame(data=records, columns=columns)
return runs.sort_values(by=["timestamp"], ascending=False)
[docs]
def get_tuner(self, run_id: str) -> PromptTuner | FineTuner | ILabTuner:
"""Create an instance of PromptTuner or FineTuner or ILabTuner based on a tuning run with a specific run_id.
:param run_id: ID of the run
:type run_id: str
:return: prompt tuner | fine | ilab tuner object
:rtype: PromptTuner | FineTuner | ILabTuner class instance
**Example:**
.. code-block:: python
from ibm_watsonx_ai.experiment import TuneExperiment
experiment = TuneExperiment(credentials, ...)
historical_tuner = experiment.runs.get_tuner(run_id='02bab973-ae83-4283-9d73-87b9fd462d35')
"""
# note: normal scenario
if not isinstance(run_id, str):
raise WMLClientError(
f"Provided run_id type was {type(run_id)} (should be a string)"
)
if self._is_fine_tuning_endpoint_available:
try:
tuning_details = self.client.training.get_details(
run_id, _is_fine_tuning=True
)
entity = tuning_details.get("entity", {})
tuning_type = (
"ilab_tuning" if entity.get("type") == "ilab" else "fine_tuning"
)
except ApiRequestFailure:
tuning_details = self.client.training.get_details(run_id)
entity = tuning_details.get("entity")
tuning_type = "prompt_tuning"
else:
tuning_details = self.client.training.get_details(run_id)
entity = tuning_details.get("entity")
tuning_type = "prompt_tuning"
if not entity:
raise WMLClientError("Provided run_id was invalid")
tuner: PromptTuner | FineTuner | ILabTuner
match tuning_type:
case "prompt_tuning":
tuning_params = entity["prompt_tuning"]
tuner = PromptTuner(
name=entity.get("name"),
task_id=tuning_params.get("task_id"),
description=entity.get("description"),
base_model=tuning_params.get("base_model", {}).get("name"),
accumulate_steps=tuning_params.get("accumulate_steps"),
batch_size=tuning_params.get("batch_size"),
init_method=tuning_params.get("init_method"),
init_text=tuning_params.get("init_text"),
learning_rate=tuning_params.get("learning_rate"),
max_input_tokens=tuning_params.get("max_input_tokens"),
max_output_tokens=tuning_params.get("max_output_tokens"),
num_epochs=tuning_params.get("num_epochs"),
tuning_type=tuning_params.get("tuning_type"),
verbalizer=tuning_params.get("verbalizer"),
auto_update_model=entity.get("auto_update_model"),
)
tuner._client = self.client
case "ilab_tuning":
tuner = ILabTuner(tuning_details["metadata"].get("name"), self.client)
case "fine_tuning":
tuning_params = entity["parameters"]
tuner = FineTuner(
name=tuning_details["metadata"].get("name"),
task_id=tuning_params.get("task_id"),
description=tuning_details["metadata"].get("description"),
base_model=tuning_params.get("base_model", {}).get("model_id"),
num_epochs=tuning_params.get("num_epochs"),
learning_rate=tuning_params.get("learning_rate"),
batch_size=tuning_params.get("batch_size"),
max_seq_length=tuning_params.get("max_seq_length"),
accumulate_steps=tuning_params.get("accumulate_steps"),
verbalizer=tuning_params.get("verbalizer"),
response_template=tuning_params.get("response_template"),
gpu=tuning_params.get("gpu"),
auto_update_model=entity.get("auto_update_model"),
api_client=self.client,
)
case _:
raise WMLClientError("Not supported tuning type")
tuner.id = run_id
return tuner
[docs]
def get_run_details(
self, run_id: str | None = None, include_metrics: bool = False
) -> dict:
"""Get run details. If run_id is not supplied, the last run will be taken.
:param run_id: ID of the run
:type run_id: str, optional
:param include_metrics: indicates to include metrics in the training details output
:type include_metrics: bool, optional
:return: configuration parameters of the run
:rtype: dict
**Example:**
.. code-block:: python
from ibm_watsonx_ai.experiment import TuneExperiment
experiment = TuneExperiment(credentials, ...)
experiment.runs.get_run_details(run_id='02bab973-ae83-4283-9d73-87b9fd462d35')
experiment.runs.get_run_details()
"""
if run_id is None:
if self._is_fine_tuning_endpoint_available:
try:
resources: list = self.client.training.get_details( # type: ignore[assignment]
limit=1,
_internal=True,
_is_fine_tuning=True,
).get(
"resources"
)
resources.extend(
self.client.training.get_details(
limit=1,
training_type="ilab",
_internal=True,
_is_fine_tuning=True,
).get("resources", [])
)
except ApiRequestFailure:
resources = self.client.training.get_details( # type: ignore[assignment]
limit=1, training_type="prompt_tuning", _internal=True
).get(
"resources"
)
else:
resources.extend(
self.client.training.get_details( # type: ignore[arg-type]
limit=1, training_type="prompt_tuning", _internal=True
).get("resources")
)
else:
resources = self.client.training.get_details( # type: ignore[assignment]
limit=1, training_type="prompt_tuning", _internal=True
).get(
"resources"
)
if len(resources) == 1:
details = resources[0]
elif len(resources) >= 2:
timestamps = {}
for i, r in enumerate(resources):
try:
timestamps[i] = datetime.fromisoformat(
r["metadata"]["modified_at"].replace("Z", "")
)
except KeyError:
timestamps[i] = datetime.fromisoformat(
r["metadata"]["created_at"].replace("Z", "")
)
details = resources[max(timestamps, key=timestamps.__getitem__)]
else:
raise WMLClientError("There is no available training run to retrieve.")
else:
if self._is_fine_tuning_endpoint_available:
try:
details = self.client.training.get_details(
training_id=run_id, _internal=True, _is_fine_tuning=True
)
except ApiRequestFailure:
details = self.client.training.get_details(
training_id=run_id, _internal=True
)
else:
details = self.client.training.get_details(
training_id=run_id, _internal=True
)
if include_metrics:
return details
if details["entity"]["status"].get("metrics", False):
del details["entity"]["status"]["metrics"]
return details