Evaluation Result

pydantic model ibm_watsonx_gov.entities.evaluation_result.AgentMetricResult

Bases: BaseMetricResult

This is the data model for metric results in the agentic app. It stores evaluation results for conversations, interactions and nodes.

Show JSON schema
{
   "title": "AgentMetricResult",
   "description": "This is the data model for metric results in the agentic app.\nIt stores evaluation results for conversations, interactions and nodes.",
   "type": "object",
   "properties": {
      "name": {
         "description": "The name of the metric.",
         "examples": [
            "answer_correctness",
            "context_relevance"
         ],
         "title": "Name",
         "type": "string"
      },
      "method": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The method used to compute this metric result.",
         "examples": [
            "token_recall"
         ],
         "title": "Method"
      },
      "provider": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The provider used to compute this metric result.",
         "title": "Provider"
      },
      "value": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "string"
            },
            {
               "type": "boolean"
            },
            {
               "type": "null"
            }
         ],
         "description": "The metric value.",
         "title": "Value"
      },
      "errors": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/Error"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The list of error messages",
         "title": "Errors"
      },
      "additional_info": {
         "anyOf": [
            {
               "type": "object"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The additional information about the metric result.",
         "title": "Additional Info"
      },
      "group": {
         "anyOf": [
            {
               "$ref": "#/$defs/MetricGroup"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The metric group"
      },
      "thresholds": {
         "default": [],
         "description": "The metric thresholds",
         "items": {
            "$ref": "#/$defs/MetricThreshold"
         },
         "title": "Thresholds",
         "type": "array"
      },
      "id": {
         "description": "The unique identifier for the metric result record. UUID.",
         "title": "Id",
         "type": "string"
      },
      "ts": {
         "description": "The timestamp when the metric was recorded.",
         "format": "date-time",
         "title": "Ts",
         "type": "string"
      },
      "applies_to": {
         "description": "The type of component the metric result applies to.",
         "enum": [
            "conversation",
            "interaction",
            "node"
         ],
         "title": "Applies To",
         "type": "string"
      },
      "interaction_id": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "description": "The ID of the interaction being evaluated.",
         "title": "Interaction Id"
      },
      "interaction_ts": {
         "anyOf": [
            {
               "format": "date-time",
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The timestamp of the interaction being evaluated.",
         "title": "Interaction Ts"
      },
      "conversation_id": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The ID of the conversation containing the interaction.",
         "title": "Conversation Id"
      },
      "node_name": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The name of the node being evaluated.",
         "title": "Node Name"
      },
      "execution_count": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The execution count of the node in an interaction.",
         "title": "Execution count"
      },
      "execution_order": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The execution order number in the sequence of nodes executed in an interaction.",
         "title": "Execution order"
      }
   },
   "$defs": {
      "Error": {
         "properties": {
            "code": {
               "description": "The error code",
               "title": "Code",
               "type": "string"
            },
            "message_en": {
               "description": "The error message in English.",
               "title": "Message En",
               "type": "string"
            },
            "parameters": {
               "default": [],
               "description": "The list of parameters to construct the message in a different locale.",
               "items": {},
               "title": "Parameters",
               "type": "array"
            }
         },
         "required": [
            "code",
            "message_en"
         ],
         "title": "Error",
         "type": "object"
      },
      "MetricGroup": {
         "enum": [
            "retrieval_quality",
            "answer_quality",
            "content_safety",
            "performance",
            "usage",
            "tool_call_quality",
            "readability"
         ],
         "title": "MetricGroup",
         "type": "string"
      },
      "MetricThreshold": {
         "description": "The class that defines the threshold for a metric.",
         "properties": {
            "type": {
               "description": "Threshold type. One of 'lower_limit', 'upper_limit'",
               "enum": [
                  "lower_limit",
                  "upper_limit"
               ],
               "title": "Type",
               "type": "string"
            },
            "value": {
               "default": 0,
               "description": "The value of metric threshold",
               "title": "Threshold value",
               "type": "number"
            }
         },
         "required": [
            "type"
         ],
         "title": "MetricThreshold",
         "type": "object"
      }
   },
   "required": [
      "name",
      "value",
      "applies_to",
      "interaction_id"
   ]
}

Config:
  • arbitrary_types_allowed: bool = True

  • use_enum_values: bool = True

Fields:
field applies_to: Annotated[Literal['conversation', 'interaction', 'node'], FieldInfo(annotation=NoneType, required=True, description='The type of component the metric result applies to.')] [Required]

The type of component the metric result applies to.

field conversation_id: Annotated[str | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The ID of the conversation containing the interaction.')] = None

The ID of the conversation containing the interaction.

field execution_count: Annotated[int | None, FieldInfo(annotation=NoneType, required=False, default=None, title='Execution count', description='The execution count of the node in an interaction.')] = None

The execution count of the node in an interaction.

field execution_order: Annotated[int | None, FieldInfo(annotation=NoneType, required=False, default=None, title='Execution order', description='The execution order number in the sequence of nodes executed in an interaction.')] = None

The execution order number in the sequence of nodes executed in an interaction.

field id: ')] [Optional]

The unique identifier for the metric result record. UUID.

field interaction_id: Annotated[str | None, FieldInfo(annotation=NoneType, required=True, description='The ID of the interaction being evaluated.')] [Required]

The ID of the interaction being evaluated.

field interaction_ts: Annotated[datetime | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The timestamp of the interaction being evaluated.')] = None

The timestamp of the interaction being evaluated.

field node_name: Annotated[str | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The name of the node being evaluated.')] = None

The name of the node being evaluated.

field ts: Annotated[datetime, FieldInfo(annotation=NoneType, required=False, default_factory=now, description='The timestamp when the metric was recorded.')] [Optional]

The timestamp when the metric was recorded.

pydantic model ibm_watsonx_gov.entities.evaluation_result.AgenticEvaluationResult

Bases: BaseModel

Show JSON schema
{
   "title": "AgenticEvaluationResult",
   "type": "object",
   "properties": {
      "metrics_results": {
         "description": "The list of metrics result.",
         "items": {
            "$ref": "#/$defs/AgentMetricResult"
         },
         "title": "Metrics result",
         "type": "array"
      },
      "aggregated_metrics_results": {
         "description": "The list of aggregated metrics result. The metrics are aggregated for each node in the agent.",
         "items": {
            "$ref": "#/$defs/AggregateAgentMetricResult"
         },
         "title": "Aggregated metrics result",
         "type": "array"
      }
   },
   "$defs": {
      "AgentMetricResult": {
         "description": "This is the data model for metric results in the agentic app.\nIt stores evaluation results for conversations, interactions and nodes.",
         "properties": {
            "name": {
               "description": "The name of the metric.",
               "examples": [
                  "answer_correctness",
                  "context_relevance"
               ],
               "title": "Name",
               "type": "string"
            },
            "method": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The method used to compute this metric result.",
               "examples": [
                  "token_recall"
               ],
               "title": "Method"
            },
            "provider": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The provider used to compute this metric result.",
               "title": "Provider"
            },
            "value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "string"
                  },
                  {
                     "type": "boolean"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "The metric value.",
               "title": "Value"
            },
            "errors": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/Error"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The list of error messages",
               "title": "Errors"
            },
            "additional_info": {
               "anyOf": [
                  {
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The additional information about the metric result.",
               "title": "Additional Info"
            },
            "group": {
               "anyOf": [
                  {
                     "$ref": "#/$defs/MetricGroup"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The metric group"
            },
            "thresholds": {
               "default": [],
               "description": "The metric thresholds",
               "items": {
                  "$ref": "#/$defs/MetricThreshold"
               },
               "title": "Thresholds",
               "type": "array"
            },
            "id": {
               "description": "The unique identifier for the metric result record. UUID.",
               "title": "Id",
               "type": "string"
            },
            "ts": {
               "description": "The timestamp when the metric was recorded.",
               "format": "date-time",
               "title": "Ts",
               "type": "string"
            },
            "applies_to": {
               "description": "The type of component the metric result applies to.",
               "enum": [
                  "conversation",
                  "interaction",
                  "node"
               ],
               "title": "Applies To",
               "type": "string"
            },
            "interaction_id": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "The ID of the interaction being evaluated.",
               "title": "Interaction Id"
            },
            "interaction_ts": {
               "anyOf": [
                  {
                     "format": "date-time",
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The timestamp of the interaction being evaluated.",
               "title": "Interaction Ts"
            },
            "conversation_id": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The ID of the conversation containing the interaction.",
               "title": "Conversation Id"
            },
            "node_name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The name of the node being evaluated.",
               "title": "Node Name"
            },
            "execution_count": {
               "anyOf": [
                  {
                     "type": "integer"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The execution count of the node in an interaction.",
               "title": "Execution count"
            },
            "execution_order": {
               "anyOf": [
                  {
                     "type": "integer"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The execution order number in the sequence of nodes executed in an interaction.",
               "title": "Execution order"
            }
         },
         "required": [
            "name",
            "value",
            "applies_to",
            "interaction_id"
         ],
         "title": "AgentMetricResult",
         "type": "object"
      },
      "AggregateAgentMetricResult": {
         "properties": {
            "name": {
               "description": "The name of the metric.",
               "examples": [
                  "answer_correctness",
                  "context_relevance"
               ],
               "title": "Name",
               "type": "string"
            },
            "method": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The method used to compute this metric result.",
               "examples": [
                  "token_recall"
               ],
               "title": "Method"
            },
            "provider": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The provider used to compute this metric result.",
               "title": "Provider"
            },
            "value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The value of the metric. Defaults to mean.",
               "title": "Value"
            },
            "errors": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/Error"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The list of error messages",
               "title": "Errors"
            },
            "additional_info": {
               "anyOf": [
                  {
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The additional information about the metric result.",
               "title": "Additional Info"
            },
            "group": {
               "anyOf": [
                  {
                     "$ref": "#/$defs/MetricGroup"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The metric group"
            },
            "thresholds": {
               "default": [],
               "description": "The metric thresholds",
               "items": {
                  "$ref": "#/$defs/MetricThreshold"
               },
               "title": "Thresholds",
               "type": "array"
            },
            "min": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The minimum value of the metric.",
               "title": "Min"
            },
            "max": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The maximum value of the metric.",
               "title": "Max"
            },
            "mean": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The mean value of the metric.",
               "title": "Mean"
            },
            "count": {
               "anyOf": [
                  {
                     "type": "integer"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The count for metric results used for aggregation.",
               "title": "Count"
            },
            "node_name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The name of the node being evaluated.",
               "title": "Node Name"
            },
            "applies_to": {
               "description": "The type of component the metric result applies to.",
               "enum": [
                  "conversation",
                  "interaction",
                  "node"
               ],
               "title": "Applies To",
               "type": "string"
            },
            "individual_results": {
               "default": [],
               "description": "The list individual metric results.",
               "items": {
                  "$ref": "#/$defs/AgentMetricResult"
               },
               "title": "Individual Results",
               "type": "array"
            }
         },
         "required": [
            "name",
            "applies_to"
         ],
         "title": "AggregateAgentMetricResult",
         "type": "object"
      },
      "Error": {
         "properties": {
            "code": {
               "description": "The error code",
               "title": "Code",
               "type": "string"
            },
            "message_en": {
               "description": "The error message in English.",
               "title": "Message En",
               "type": "string"
            },
            "parameters": {
               "default": [],
               "description": "The list of parameters to construct the message in a different locale.",
               "items": {},
               "title": "Parameters",
               "type": "array"
            }
         },
         "required": [
            "code",
            "message_en"
         ],
         "title": "Error",
         "type": "object"
      },
      "MetricGroup": {
         "enum": [
            "retrieval_quality",
            "answer_quality",
            "content_safety",
            "performance",
            "usage",
            "tool_call_quality",
            "readability"
         ],
         "title": "MetricGroup",
         "type": "string"
      },
      "MetricThreshold": {
         "description": "The class that defines the threshold for a metric.",
         "properties": {
            "type": {
               "description": "Threshold type. One of 'lower_limit', 'upper_limit'",
               "enum": [
                  "lower_limit",
                  "upper_limit"
               ],
               "title": "Type",
               "type": "string"
            },
            "value": {
               "default": 0,
               "description": "The value of metric threshold",
               "title": "Threshold value",
               "type": "number"
            }
         },
         "required": [
            "type"
         ],
         "title": "MetricThreshold",
         "type": "object"
      }
   },
   "required": [
      "metrics_results",
      "aggregated_metrics_results"
   ]
}

Fields:
field aggregated_metrics_results: Annotated[List[AggregateAgentMetricResult], FieldInfo(annotation=NoneType, required=True, title='Aggregated metrics result', description='The list of aggregated metrics result. The metrics are aggregated for each node in the agent.')] [Required]

The list of aggregated metrics result. The metrics are aggregated for each node in the agent.

field metrics_results: Annotated[List[AgentMetricResult], FieldInfo(annotation=NoneType, required=True, title='Metrics result', description='The list of metrics result.')] [Required]

The list of metrics result.

get_aggregated_metrics_results(applies_to: list[Literal['conversation', 'interaction', 'node']] = ['conversation', 'interaction', 'node'], node_name: str | None = None, include_individual_results: bool = True, format: Literal['json', 'object'] = 'json', **kwargs) list[AggregateAgentMetricResult] | list[dict]

Get the aggregated agentic metrics results based on the specified arguments.

Parameters:
  • applies_to (AGENTIC_RESULT_COMPONENTS, optional) – The type of component the metric result applies to. Defaults to [“conversation”, “interaction”, “node”].

  • node_name (str, optional) – The name of the node to get the aggregated results for. Defaults to None.

  • include_individual_results (bool, optional) – Whether to return the individual metrics results. Defaults to False.

  • format (Literal["json", "object"], optional) – The format of the output. Defaults to “json”.

Returns:

list[AggregateAgentMetricResult] | list [dict]

Return type:

returns

get_metrics_results(applies_to: list[Literal['conversation', 'interaction', 'node']] = ['conversation', 'interaction', 'node'], node_name: str | None = None, format: Literal['json', 'object'] = 'json', **kwargs) list[AgentMetricResult] | list[dict]

Get the agentic metrics results based on the specified arguments.

Parameters:
  • applies_to (AGENTIC_RESULT_COMPONENTS, optional) – The type of component the metrics results applies to. Defaults to [“conversation”, “interaction”, “node”].

  • node_name (str, optional) – The name of the node to get the metrics results for. Defaults to None.

  • format (Literal["json", "object"], optional) – The format of the output. Defaults to “json”.

Returns:

list[AgentMetricResult] | list [dict]

Return type:

returns

to_df(input_data: DataFrame | None = None, interaction_id_field: str = 'interaction_id', wide_format: bool = True) DataFrame

Get individual metrics dataframe.

If the input dataframe is provided, it will be merged with the metrics dataframe.

Parameters:
  • input_data (Optional[pd.DataFrame], optional) – Input data to merge with metrics dataframe.. Defaults to None.

  • interaction_id_field (str, optional) – Field to use for merging input data and metrics dataframe.. Defaults to “interaction_id”.

  • wide_format (bool) – Determines whether to display the results in a pivot table format. Defaults to True

Returns:

Metrics dataframe.

Return type:

pd.DataFrame

to_json(**kwargs) dict

Get the AgenticEvaluationResult as json

Returns:

The AgenticEvaluationResult

Return type:

dict

pydantic model ibm_watsonx_gov.entities.evaluation_result.AggregateAgentMetricResult

Bases: BaseMetricResult

Show JSON schema
{
   "title": "AggregateAgentMetricResult",
   "type": "object",
   "properties": {
      "name": {
         "description": "The name of the metric.",
         "examples": [
            "answer_correctness",
            "context_relevance"
         ],
         "title": "Name",
         "type": "string"
      },
      "method": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The method used to compute this metric result.",
         "examples": [
            "token_recall"
         ],
         "title": "Method"
      },
      "provider": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The provider used to compute this metric result.",
         "title": "Provider"
      },
      "value": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The value of the metric. Defaults to mean.",
         "title": "Value"
      },
      "errors": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/Error"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The list of error messages",
         "title": "Errors"
      },
      "additional_info": {
         "anyOf": [
            {
               "type": "object"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The additional information about the metric result.",
         "title": "Additional Info"
      },
      "group": {
         "anyOf": [
            {
               "$ref": "#/$defs/MetricGroup"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The metric group"
      },
      "thresholds": {
         "default": [],
         "description": "The metric thresholds",
         "items": {
            "$ref": "#/$defs/MetricThreshold"
         },
         "title": "Thresholds",
         "type": "array"
      },
      "min": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The minimum value of the metric.",
         "title": "Min"
      },
      "max": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The maximum value of the metric.",
         "title": "Max"
      },
      "mean": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The mean value of the metric.",
         "title": "Mean"
      },
      "count": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The count for metric results used for aggregation.",
         "title": "Count"
      },
      "node_name": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The name of the node being evaluated.",
         "title": "Node Name"
      },
      "applies_to": {
         "description": "The type of component the metric result applies to.",
         "enum": [
            "conversation",
            "interaction",
            "node"
         ],
         "title": "Applies To",
         "type": "string"
      },
      "individual_results": {
         "default": [],
         "description": "The list individual metric results.",
         "items": {
            "$ref": "#/$defs/AgentMetricResult"
         },
         "title": "Individual Results",
         "type": "array"
      }
   },
   "$defs": {
      "AgentMetricResult": {
         "description": "This is the data model for metric results in the agentic app.\nIt stores evaluation results for conversations, interactions and nodes.",
         "properties": {
            "name": {
               "description": "The name of the metric.",
               "examples": [
                  "answer_correctness",
                  "context_relevance"
               ],
               "title": "Name",
               "type": "string"
            },
            "method": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The method used to compute this metric result.",
               "examples": [
                  "token_recall"
               ],
               "title": "Method"
            },
            "provider": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The provider used to compute this metric result.",
               "title": "Provider"
            },
            "value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "string"
                  },
                  {
                     "type": "boolean"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "The metric value.",
               "title": "Value"
            },
            "errors": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/Error"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The list of error messages",
               "title": "Errors"
            },
            "additional_info": {
               "anyOf": [
                  {
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The additional information about the metric result.",
               "title": "Additional Info"
            },
            "group": {
               "anyOf": [
                  {
                     "$ref": "#/$defs/MetricGroup"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The metric group"
            },
            "thresholds": {
               "default": [],
               "description": "The metric thresholds",
               "items": {
                  "$ref": "#/$defs/MetricThreshold"
               },
               "title": "Thresholds",
               "type": "array"
            },
            "id": {
               "description": "The unique identifier for the metric result record. UUID.",
               "title": "Id",
               "type": "string"
            },
            "ts": {
               "description": "The timestamp when the metric was recorded.",
               "format": "date-time",
               "title": "Ts",
               "type": "string"
            },
            "applies_to": {
               "description": "The type of component the metric result applies to.",
               "enum": [
                  "conversation",
                  "interaction",
                  "node"
               ],
               "title": "Applies To",
               "type": "string"
            },
            "interaction_id": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "The ID of the interaction being evaluated.",
               "title": "Interaction Id"
            },
            "interaction_ts": {
               "anyOf": [
                  {
                     "format": "date-time",
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The timestamp of the interaction being evaluated.",
               "title": "Interaction Ts"
            },
            "conversation_id": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The ID of the conversation containing the interaction.",
               "title": "Conversation Id"
            },
            "node_name": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The name of the node being evaluated.",
               "title": "Node Name"
            },
            "execution_count": {
               "anyOf": [
                  {
                     "type": "integer"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The execution count of the node in an interaction.",
               "title": "Execution count"
            },
            "execution_order": {
               "anyOf": [
                  {
                     "type": "integer"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The execution order number in the sequence of nodes executed in an interaction.",
               "title": "Execution order"
            }
         },
         "required": [
            "name",
            "value",
            "applies_to",
            "interaction_id"
         ],
         "title": "AgentMetricResult",
         "type": "object"
      },
      "Error": {
         "properties": {
            "code": {
               "description": "The error code",
               "title": "Code",
               "type": "string"
            },
            "message_en": {
               "description": "The error message in English.",
               "title": "Message En",
               "type": "string"
            },
            "parameters": {
               "default": [],
               "description": "The list of parameters to construct the message in a different locale.",
               "items": {},
               "title": "Parameters",
               "type": "array"
            }
         },
         "required": [
            "code",
            "message_en"
         ],
         "title": "Error",
         "type": "object"
      },
      "MetricGroup": {
         "enum": [
            "retrieval_quality",
            "answer_quality",
            "content_safety",
            "performance",
            "usage",
            "tool_call_quality",
            "readability"
         ],
         "title": "MetricGroup",
         "type": "string"
      },
      "MetricThreshold": {
         "description": "The class that defines the threshold for a metric.",
         "properties": {
            "type": {
               "description": "Threshold type. One of 'lower_limit', 'upper_limit'",
               "enum": [
                  "lower_limit",
                  "upper_limit"
               ],
               "title": "Type",
               "type": "string"
            },
            "value": {
               "default": 0,
               "description": "The value of metric threshold",
               "title": "Threshold value",
               "type": "number"
            }
         },
         "required": [
            "type"
         ],
         "title": "MetricThreshold",
         "type": "object"
      }
   },
   "required": [
      "name",
      "applies_to"
   ]
}

Config:
  • arbitrary_types_allowed: bool = True

  • use_enum_values: bool = True

Fields:
field applies_to: Annotated[Literal['conversation', 'interaction', 'node'], FieldInfo(annotation=NoneType, required=True, description='The type of component the metric result applies to.')] [Required]

The type of component the metric result applies to.

field count: Annotated[int | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The count for metric results used for aggregation.')] = None

The count for metric results used for aggregation.

field individual_results: Annotated[list[AgentMetricResult], FieldInfo(annotation=NoneType, required=False, default=[], description='The list individual metric results.')] = []

The list individual metric results.

field max: Annotated[float | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The maximum value of the metric.')] = None

The maximum value of the metric.

field mean: Annotated[float | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The mean value of the metric.')] = None

The mean value of the metric.

field min: Annotated[float | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The minimum value of the metric.')] = None

The minimum value of the metric.

field node_name: Annotated[str | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The name of the node being evaluated.')] = None

The name of the node being evaluated.

field value: Annotated[float | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The value of the metric. Defaults to mean.')] = None

The value of the metric. Defaults to mean.

pydantic model ibm_watsonx_gov.entities.evaluation_result.AggregateMetricResult

Bases: BaseMetricResult

Show JSON schema
{
   "title": "AggregateMetricResult",
   "type": "object",
   "properties": {
      "name": {
         "description": "The name of the metric.",
         "examples": [
            "answer_correctness",
            "context_relevance"
         ],
         "title": "Name",
         "type": "string"
      },
      "method": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The method used to compute this metric result.",
         "examples": [
            "token_recall"
         ],
         "title": "Method"
      },
      "provider": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The provider used to compute this metric result.",
         "title": "Provider"
      },
      "value": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "string"
            },
            {
               "type": "boolean"
            },
            {
               "type": "null"
            }
         ],
         "description": "The metric value.",
         "title": "Value"
      },
      "errors": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/Error"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The list of error messages",
         "title": "Errors"
      },
      "additional_info": {
         "anyOf": [
            {
               "type": "object"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The additional information about the metric result.",
         "title": "Additional Info"
      },
      "group": {
         "anyOf": [
            {
               "$ref": "#/$defs/MetricGroup"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The metric group"
      },
      "thresholds": {
         "default": [],
         "description": "The metric thresholds",
         "items": {
            "$ref": "#/$defs/MetricThreshold"
         },
         "title": "Thresholds",
         "type": "array"
      },
      "min": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Min"
      },
      "max": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Max"
      },
      "mean": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Mean"
      },
      "total_records": {
         "title": "Total Records",
         "type": "integer"
      },
      "record_level_metrics": {
         "default": [],
         "items": {
            "$ref": "#/$defs/RecordMetricResult"
         },
         "title": "Record Level Metrics",
         "type": "array"
      }
   },
   "$defs": {
      "Error": {
         "properties": {
            "code": {
               "description": "The error code",
               "title": "Code",
               "type": "string"
            },
            "message_en": {
               "description": "The error message in English.",
               "title": "Message En",
               "type": "string"
            },
            "parameters": {
               "default": [],
               "description": "The list of parameters to construct the message in a different locale.",
               "items": {},
               "title": "Parameters",
               "type": "array"
            }
         },
         "required": [
            "code",
            "message_en"
         ],
         "title": "Error",
         "type": "object"
      },
      "MetricGroup": {
         "enum": [
            "retrieval_quality",
            "answer_quality",
            "content_safety",
            "performance",
            "usage",
            "tool_call_quality",
            "readability"
         ],
         "title": "MetricGroup",
         "type": "string"
      },
      "MetricThreshold": {
         "description": "The class that defines the threshold for a metric.",
         "properties": {
            "type": {
               "description": "Threshold type. One of 'lower_limit', 'upper_limit'",
               "enum": [
                  "lower_limit",
                  "upper_limit"
               ],
               "title": "Type",
               "type": "string"
            },
            "value": {
               "default": 0,
               "description": "The value of metric threshold",
               "title": "Threshold value",
               "type": "number"
            }
         },
         "required": [
            "type"
         ],
         "title": "MetricThreshold",
         "type": "object"
      },
      "RecordMetricResult": {
         "properties": {
            "name": {
               "description": "The name of the metric.",
               "examples": [
                  "answer_correctness",
                  "context_relevance"
               ],
               "title": "Name",
               "type": "string"
            },
            "method": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The method used to compute this metric result.",
               "examples": [
                  "token_recall"
               ],
               "title": "Method"
            },
            "provider": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The provider used to compute this metric result.",
               "title": "Provider"
            },
            "value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "string"
                  },
                  {
                     "type": "boolean"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "The metric value.",
               "title": "Value"
            },
            "errors": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/Error"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The list of error messages",
               "title": "Errors"
            },
            "additional_info": {
               "anyOf": [
                  {
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The additional information about the metric result.",
               "title": "Additional Info"
            },
            "group": {
               "anyOf": [
                  {
                     "$ref": "#/$defs/MetricGroup"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The metric group"
            },
            "thresholds": {
               "default": [],
               "description": "The metric thresholds",
               "items": {
                  "$ref": "#/$defs/MetricThreshold"
               },
               "title": "Thresholds",
               "type": "array"
            },
            "record_id": {
               "description": "The record identifier.",
               "examples": [
                  "record1"
               ],
               "title": "Record Id",
               "type": "string"
            },
            "record_timestamp": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The record timestamp.",
               "examples": [
                  "2025-01-01T00:00:00.000000Z"
               ],
               "title": "Record Timestamp"
            }
         },
         "required": [
            "name",
            "value",
            "record_id"
         ],
         "title": "RecordMetricResult",
         "type": "object"
      }
   },
   "required": [
      "name",
      "value",
      "total_records"
   ]
}

Config:
  • arbitrary_types_allowed: bool = True

  • use_enum_values: bool = True

Fields:
field max: float | None = None
field mean: float | None = None
field min: float | None = None
field record_level_metrics: list[RecordMetricResult] = []
field total_records: int [Required]
pydantic model ibm_watsonx_gov.entities.evaluation_result.MetricsEvaluationResult

Bases: BaseModel

Show JSON schema
{
   "title": "MetricsEvaluationResult",
   "type": "object",
   "properties": {
      "metrics_result": {
         "items": {
            "$ref": "#/$defs/AggregateMetricResult"
         },
         "title": "Metrics Result",
         "type": "array"
      }
   },
   "$defs": {
      "AggregateMetricResult": {
         "properties": {
            "name": {
               "description": "The name of the metric.",
               "examples": [
                  "answer_correctness",
                  "context_relevance"
               ],
               "title": "Name",
               "type": "string"
            },
            "method": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The method used to compute this metric result.",
               "examples": [
                  "token_recall"
               ],
               "title": "Method"
            },
            "provider": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The provider used to compute this metric result.",
               "title": "Provider"
            },
            "value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "string"
                  },
                  {
                     "type": "boolean"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "The metric value.",
               "title": "Value"
            },
            "errors": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/Error"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The list of error messages",
               "title": "Errors"
            },
            "additional_info": {
               "anyOf": [
                  {
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The additional information about the metric result.",
               "title": "Additional Info"
            },
            "group": {
               "anyOf": [
                  {
                     "$ref": "#/$defs/MetricGroup"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The metric group"
            },
            "thresholds": {
               "default": [],
               "description": "The metric thresholds",
               "items": {
                  "$ref": "#/$defs/MetricThreshold"
               },
               "title": "Thresholds",
               "type": "array"
            },
            "min": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "title": "Min"
            },
            "max": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "title": "Max"
            },
            "mean": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "title": "Mean"
            },
            "total_records": {
               "title": "Total Records",
               "type": "integer"
            },
            "record_level_metrics": {
               "default": [],
               "items": {
                  "$ref": "#/$defs/RecordMetricResult"
               },
               "title": "Record Level Metrics",
               "type": "array"
            }
         },
         "required": [
            "name",
            "value",
            "total_records"
         ],
         "title": "AggregateMetricResult",
         "type": "object"
      },
      "Error": {
         "properties": {
            "code": {
               "description": "The error code",
               "title": "Code",
               "type": "string"
            },
            "message_en": {
               "description": "The error message in English.",
               "title": "Message En",
               "type": "string"
            },
            "parameters": {
               "default": [],
               "description": "The list of parameters to construct the message in a different locale.",
               "items": {},
               "title": "Parameters",
               "type": "array"
            }
         },
         "required": [
            "code",
            "message_en"
         ],
         "title": "Error",
         "type": "object"
      },
      "MetricGroup": {
         "enum": [
            "retrieval_quality",
            "answer_quality",
            "content_safety",
            "performance",
            "usage",
            "tool_call_quality",
            "readability"
         ],
         "title": "MetricGroup",
         "type": "string"
      },
      "MetricThreshold": {
         "description": "The class that defines the threshold for a metric.",
         "properties": {
            "type": {
               "description": "Threshold type. One of 'lower_limit', 'upper_limit'",
               "enum": [
                  "lower_limit",
                  "upper_limit"
               ],
               "title": "Type",
               "type": "string"
            },
            "value": {
               "default": 0,
               "description": "The value of metric threshold",
               "title": "Threshold value",
               "type": "number"
            }
         },
         "required": [
            "type"
         ],
         "title": "MetricThreshold",
         "type": "object"
      },
      "RecordMetricResult": {
         "properties": {
            "name": {
               "description": "The name of the metric.",
               "examples": [
                  "answer_correctness",
                  "context_relevance"
               ],
               "title": "Name",
               "type": "string"
            },
            "method": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The method used to compute this metric result.",
               "examples": [
                  "token_recall"
               ],
               "title": "Method"
            },
            "provider": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The provider used to compute this metric result.",
               "title": "Provider"
            },
            "value": {
               "anyOf": [
                  {
                     "type": "number"
                  },
                  {
                     "type": "string"
                  },
                  {
                     "type": "boolean"
                  },
                  {
                     "type": "null"
                  }
               ],
               "description": "The metric value.",
               "title": "Value"
            },
            "errors": {
               "anyOf": [
                  {
                     "items": {
                        "$ref": "#/$defs/Error"
                     },
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The list of error messages",
               "title": "Errors"
            },
            "additional_info": {
               "anyOf": [
                  {
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The additional information about the metric result.",
               "title": "Additional Info"
            },
            "group": {
               "anyOf": [
                  {
                     "$ref": "#/$defs/MetricGroup"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The metric group"
            },
            "thresholds": {
               "default": [],
               "description": "The metric thresholds",
               "items": {
                  "$ref": "#/$defs/MetricThreshold"
               },
               "title": "Thresholds",
               "type": "array"
            },
            "record_id": {
               "description": "The record identifier.",
               "examples": [
                  "record1"
               ],
               "title": "Record Id",
               "type": "string"
            },
            "record_timestamp": {
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "description": "The record timestamp.",
               "examples": [
                  "2025-01-01T00:00:00.000000Z"
               ],
               "title": "Record Timestamp"
            }
         },
         "required": [
            "name",
            "value",
            "record_id"
         ],
         "title": "RecordMetricResult",
         "type": "object"
      }
   },
   "required": [
      "metrics_result"
   ]
}

Fields:
field metrics_result: list[AggregateMetricResult] [Required]
to_df(data: DataFrame | None = None, include_additional_info: bool = False) DataFrame

Transform the metrics evaluation result to a dataframe.

Parameters:
  • data (pd.DataFrame) – the input dataframe, when passed will be concatenated to the metrics result

  • include_additional_info (bool) – wether to include additional info in the metrics result

Returns:

new dataframe of the input and the evaluated metrics

Return type:

pd.DataFrame

to_dict() list[dict]

Transform the metrics evaluation result to a list of dict containing the record level metrics.

to_json(indent: int | None = None, **kwargs)

Transform the metrics evaluation result to a json. The kwargs are passed to the model_dump_json method of pydantic model. All the arguments supported by pydantic model_dump_json can be passed.

Parameters:

indent (int, optional) – The indentation level for the json. Defaults to None.

Returns:

string of the result json.

pydantic model ibm_watsonx_gov.entities.evaluation_result.RecordMetricResult

Bases: BaseMetricResult

Show JSON schema
{
   "title": "RecordMetricResult",
   "type": "object",
   "properties": {
      "name": {
         "description": "The name of the metric.",
         "examples": [
            "answer_correctness",
            "context_relevance"
         ],
         "title": "Name",
         "type": "string"
      },
      "method": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The method used to compute this metric result.",
         "examples": [
            "token_recall"
         ],
         "title": "Method"
      },
      "provider": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The provider used to compute this metric result.",
         "title": "Provider"
      },
      "value": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "string"
            },
            {
               "type": "boolean"
            },
            {
               "type": "null"
            }
         ],
         "description": "The metric value.",
         "title": "Value"
      },
      "errors": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/Error"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The list of error messages",
         "title": "Errors"
      },
      "additional_info": {
         "anyOf": [
            {
               "type": "object"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The additional information about the metric result.",
         "title": "Additional Info"
      },
      "group": {
         "anyOf": [
            {
               "$ref": "#/$defs/MetricGroup"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The metric group"
      },
      "thresholds": {
         "default": [],
         "description": "The metric thresholds",
         "items": {
            "$ref": "#/$defs/MetricThreshold"
         },
         "title": "Thresholds",
         "type": "array"
      },
      "record_id": {
         "description": "The record identifier.",
         "examples": [
            "record1"
         ],
         "title": "Record Id",
         "type": "string"
      },
      "record_timestamp": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The record timestamp.",
         "examples": [
            "2025-01-01T00:00:00.000000Z"
         ],
         "title": "Record Timestamp"
      }
   },
   "$defs": {
      "Error": {
         "properties": {
            "code": {
               "description": "The error code",
               "title": "Code",
               "type": "string"
            },
            "message_en": {
               "description": "The error message in English.",
               "title": "Message En",
               "type": "string"
            },
            "parameters": {
               "default": [],
               "description": "The list of parameters to construct the message in a different locale.",
               "items": {},
               "title": "Parameters",
               "type": "array"
            }
         },
         "required": [
            "code",
            "message_en"
         ],
         "title": "Error",
         "type": "object"
      },
      "MetricGroup": {
         "enum": [
            "retrieval_quality",
            "answer_quality",
            "content_safety",
            "performance",
            "usage",
            "tool_call_quality",
            "readability"
         ],
         "title": "MetricGroup",
         "type": "string"
      },
      "MetricThreshold": {
         "description": "The class that defines the threshold for a metric.",
         "properties": {
            "type": {
               "description": "Threshold type. One of 'lower_limit', 'upper_limit'",
               "enum": [
                  "lower_limit",
                  "upper_limit"
               ],
               "title": "Type",
               "type": "string"
            },
            "value": {
               "default": 0,
               "description": "The value of metric threshold",
               "title": "Threshold value",
               "type": "number"
            }
         },
         "required": [
            "type"
         ],
         "title": "MetricThreshold",
         "type": "object"
      }
   },
   "required": [
      "name",
      "value",
      "record_id"
   ]
}

Config:
  • arbitrary_types_allowed: bool = True

  • use_enum_values: bool = True

Fields:
field record_id: Annotated[str, FieldInfo(annotation=NoneType, required=True, description='The record identifier.', examples=['record1'])] [Required]

The record identifier.

field record_timestamp: Annotated[str | None, FieldInfo(annotation=NoneType, required=False, default=None, description='The record timestamp.', examples=['2025-01-01T00:00:00.000000Z'])] = None

The record timestamp.

pydantic model ibm_watsonx_gov.entities.evaluation_result.ToolMetricResult

Bases: RecordMetricResult

Show JSON schema
{
   "title": "ToolMetricResult",
   "type": "object",
   "properties": {
      "name": {
         "description": "The name of the metric.",
         "examples": [
            "answer_correctness",
            "context_relevance"
         ],
         "title": "Name",
         "type": "string"
      },
      "method": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The method used to compute this metric result.",
         "examples": [
            "token_recall"
         ],
         "title": "Method"
      },
      "provider": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The provider used to compute this metric result.",
         "title": "Provider"
      },
      "value": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "string"
            },
            {
               "type": "boolean"
            },
            {
               "type": "null"
            }
         ],
         "description": "The metric value.",
         "title": "Value"
      },
      "errors": {
         "anyOf": [
            {
               "items": {
                  "$ref": "#/$defs/Error"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The list of error messages",
         "title": "Errors"
      },
      "additional_info": {
         "anyOf": [
            {
               "type": "object"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The additional information about the metric result.",
         "title": "Additional Info"
      },
      "group": {
         "anyOf": [
            {
               "$ref": "#/$defs/MetricGroup"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The metric group"
      },
      "thresholds": {
         "default": [],
         "description": "The metric thresholds",
         "items": {
            "$ref": "#/$defs/MetricThreshold"
         },
         "title": "Thresholds",
         "type": "array"
      },
      "record_id": {
         "description": "The record identifier.",
         "examples": [
            "record1"
         ],
         "title": "Record Id",
         "type": "string"
      },
      "record_timestamp": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The record timestamp.",
         "examples": [
            "2025-01-01T00:00:00.000000Z"
         ],
         "title": "Record Timestamp"
      },
      "tool_name": {
         "description": "Name of the tool for which this result is computed.",
         "title": "Tool Name",
         "type": "string"
      },
      "execution_count": {
         "default": 1,
         "description": "The execution count for this tool name.",
         "exclusiveMinimum": 0,
         "title": "Execution count",
         "type": "integer"
      }
   },
   "$defs": {
      "Error": {
         "properties": {
            "code": {
               "description": "The error code",
               "title": "Code",
               "type": "string"
            },
            "message_en": {
               "description": "The error message in English.",
               "title": "Message En",
               "type": "string"
            },
            "parameters": {
               "default": [],
               "description": "The list of parameters to construct the message in a different locale.",
               "items": {},
               "title": "Parameters",
               "type": "array"
            }
         },
         "required": [
            "code",
            "message_en"
         ],
         "title": "Error",
         "type": "object"
      },
      "MetricGroup": {
         "enum": [
            "retrieval_quality",
            "answer_quality",
            "content_safety",
            "performance",
            "usage",
            "tool_call_quality",
            "readability"
         ],
         "title": "MetricGroup",
         "type": "string"
      },
      "MetricThreshold": {
         "description": "The class that defines the threshold for a metric.",
         "properties": {
            "type": {
               "description": "Threshold type. One of 'lower_limit', 'upper_limit'",
               "enum": [
                  "lower_limit",
                  "upper_limit"
               ],
               "title": "Type",
               "type": "string"
            },
            "value": {
               "default": 0,
               "description": "The value of metric threshold",
               "title": "Threshold value",
               "type": "number"
            }
         },
         "required": [
            "type"
         ],
         "title": "MetricThreshold",
         "type": "object"
      }
   },
   "required": [
      "name",
      "value",
      "record_id",
      "tool_name"
   ]
}

Config:
  • arbitrary_types_allowed: bool = True

  • use_enum_values: bool = True

Fields:
field execution_count: Annotated[int, FieldInfo(annotation=NoneType, required=False, default=1, title='Execution count', description='The execution count for this tool name.', metadata=[Gt(gt=0)])] = 1

The execution count for this tool name.

Constraints:
  • gt = 0

field tool_name: Annotated[str, FieldInfo(annotation=NoneType, required=True, title='Tool Name', description='Name of the tool for which this result is computed.')] [Required]

Name of the tool for which this result is computed.