Skip to content

Notebooks & Templates

Programmatic notebook generation from RAG pattern definitions.

Notebook Builder

notebook

Classes

Notebook

Notebook(
    kernel_name: str = "python3",
    kernel_display_name: str = "Python 3",
    language: str = "python",
    language_version: str = "3.13.11",
    cells: list[NotebookCell] | None = None,
)

Builder for programmatically creating and manipulating Jupyter notebooks.

Provides a fluent API for building notebooks by adding cells, formatting content with placeholder substitution, and saving to disk.

Parameters:

  • kernel_name (str, default: "python3" ) –

    Kernel name for the notebook.

  • kernel_display_name (str, default: "Python 3" ) –

    Display name for the kernel.

  • language (str, default: "python" ) –

    Programming language.

  • language_version (str, default: "3.13.11" ) –

    Language version.

  • cells (list[NotebookCell] | None, default: None ) –

    Notebook cells to build the notebook from.

Examples:

>>> nb = Notebook(
...     cells=[
...         NotebookCell(
...             cell_type="markdown",
...             source="### Hello world!",
...         )
...     ]
... )
>>> nb.save("output.ipynb")
Source code in ai4rag/components/assets_generator/notebook.py
def __init__(
    self,
    kernel_name: str = "python3",
    kernel_display_name: str = "Python 3",
    language: str = "python",
    language_version: str = "3.13.11",
    cells: list[NotebookCell] | None = None,
):
    self.cells: list[NotebookCell] = cells if cells else []
    self.metadata = {
        "kernelspec": {
            "display_name": kernel_display_name,
            "language": language,
            "name": kernel_name,
        },
        "language_info": {"name": language, "version": language_version},
    }
    self.nbformat = 4
    self.nbformat_minor = 4
Functions
to_dict
to_dict() -> dict

Convert notebook to dictionary format.

Returns:

  • dict

    Notebook in Jupyter JSON format.

Source code in ai4rag/components/assets_generator/notebook.py
def to_dict(self) -> dict:
    """Convert notebook to dictionary format.

    Returns
    -------
    dict
        Notebook in Jupyter JSON format.
    """
    return {
        "cells": [cell.to_dict() for cell in self.cells],
        "metadata": self.metadata,
        "nbformat": self.nbformat,
        "nbformat_minor": self.nbformat_minor,
    }
save
save(path: str | Path, indent: int = 2) -> Notebook

Save notebook to a file.

Parameters:

  • path (str | Path) –

    Output file path.

  • indent (int, default: 2 ) –

    JSON indentation level.

Returns:

  • Notebook

    Self for method chaining.

Examples:

>>> nb = Notebook()
>>> nb.save("output.ipynb")
Source code in ai4rag/components/assets_generator/notebook.py
def save(self, path: str | Path, indent: int = 2) -> Notebook:
    """Save notebook to a file.

    Parameters
    ----------
    path : str | Path
        Output file path.
    indent : int, default=2
        JSON indentation level.

    Returns
    -------
    Notebook
        Self for method chaining.

    Examples
    --------
    >>> nb = Notebook()
    >>> nb.save("output.ipynb")
    """
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    with path.open("w+", encoding="utf-8") as f:
        json_dump(self.to_dict(), f, indent=indent)

    return self
load classmethod
load(notebook_name: str, templates_dir: str | Path | None = None) -> Notebook

Load a Jupyter notebook template from bundled package data or a custom directory.

Parameters:

  • notebook_name (str) –

    Name of the template file (e.g. "ogx_indexing_template.ipynb").

  • templates_dir (str | Path | None, default: None ) –

    Directory containing the template notebooks. When None, templates are loaded from the notebook_templates/ sub-package bundled with ai4rag.assets_generator.

Returns:

  • Notebook

    A new Notebook instance populated with the loaded cells and metadata.

Examples:

>>> nb = Notebook.load("ogx_indexing_template.ipynb")
>>> nb = Notebook.load("custom.ipynb", templates_dir="/data/templates")
Source code in ai4rag/components/assets_generator/notebook.py
@classmethod
def load(
    cls,
    notebook_name: str,
    templates_dir: str | Path | None = None,
) -> Notebook:
    """Load a Jupyter notebook template from bundled package data or a custom directory.

    Parameters
    ----------
    notebook_name : str
        Name of the template file (e.g. ``"ogx_indexing_template.ipynb"``).
    templates_dir : str | Path | None, default=None
        Directory containing the template notebooks.  When *None*,
        templates are loaded from the ``notebook_templates/`` sub-package
        bundled with ``ai4rag.assets_generator``.

    Returns
    -------
    Notebook
        A new Notebook instance populated with the loaded cells and metadata.

    Examples
    --------
    >>> nb = Notebook.load("ogx_indexing_template.ipynb")
    >>> nb = Notebook.load("custom.ipynb", templates_dir="/data/templates")
    """
    if templates_dir is not None:
        resolved_path = Path(templates_dir) / notebook_name
        with resolved_path.open("r", encoding="utf-8") as f:
            nb_dict = json_load(f)
    else:
        template_path = importlib.resources.files("ai4rag.components.assets_generator").joinpath(
            "notebook_templates", notebook_name
        )
        with importlib.resources.as_file(template_path) as resolved_path:
            with resolved_path.open("r", encoding="utf-8") as f:
                nb_dict = json_load(f)

    loaded_cells = []
    for cell_data in nb_dict.get("cells", []):
        cell = NotebookCell(
            cell_type=cell_data.get("cell_type", "code"),
            source=cell_data.get("source", ""),
            metadata=cell_data.get("metadata", {}),
        )

        if cell.cell_type == "code":
            cell.execution_count = cell_data.get("execution_count")
            cell.outputs = cell_data.get("outputs", [])

        loaded_cells.append(cell)

    metadata = nb_dict.get("metadata", {})
    kernelspec = metadata.get("kernelspec", {})
    language_info = metadata.get("language_info", {})

    notebook = cls(
        kernel_name=kernelspec.get("name", "python3"),
        kernel_display_name=kernelspec.get("display_name", "Python 3"),
        language=language_info.get("name", "python"),
        language_version=language_info.get("version", "3.13.11"),
        cells=loaded_cells,
    )

    # Preserve exact original metadata and notebook format versions
    notebook.metadata = metadata
    notebook.nbformat = nb_dict.get("nbformat", 4)
    notebook.nbformat_minor = nb_dict.get("nbformat_minor", 4)

    return notebook

NotebookCell

NotebookCell(cell_type: Literal['code', 'markdown'], source: str | list[str], metadata: dict | None = None)

Represents a single cell in a Jupyter notebook.

Parameters:

  • cell_type (('code', 'markdown'), default: "code" ) –

    The type of cell.

  • source (str | list[str]) –

    The cell content. Can be a string or list of strings.

  • metadata (dict, default: None ) –

    Cell metadata.

Source code in ai4rag/components/assets_generator/notebook.py
def __init__(
    self,
    cell_type: Literal["code", "markdown"],
    source: str | list[str],
    metadata: dict | None = None,
):
    self.cell_type = cell_type
    self.metadata = metadata or {}
    self.source = source

    if cell_type == "code":
        self.execution_count = None
        self.outputs: list = []
Functions
to_dict
to_dict() -> dict

Convert cell to notebook JSON format.

Returns:

  • dict

    Cell in Jupyter notebook JSON format.

Source code in ai4rag/components/assets_generator/notebook.py
def to_dict(self) -> dict:
    """Convert cell to notebook JSON format.

    Returns
    -------
    dict
        Cell in Jupyter notebook JSON format.
    """
    cell_dict = {
        "cell_type": self.cell_type,
        "metadata": self.metadata,
        "source": self.source,
    }

    if self.cell_type == "code":
        cell_dict["execution_count"] = self.execution_count
        cell_dict["outputs"] = self.outputs

    return cell_dict
format_source
format_source(placeholders_mapping: dict) -> Self

Format cell source by substituting placeholders.

Performs str.format-style substitution on each line of the cell source. Placeholders not present in placeholders_mapping are replaced with empty strings so that missing keys never raise.

Parameters:

  • placeholders_mapping (dict) –

    Mapping from placeholder names to replacement values.

Returns:

  • Self

    This cell instance (mutated in-place) for method chaining.

Source code in ai4rag/components/assets_generator/notebook.py
def format_source(self, placeholders_mapping: dict) -> Self:
    """Format cell source by substituting placeholders.

    Performs ``str.format``-style substitution on each line of the cell
    source.  Placeholders not present in *placeholders_mapping* are
    replaced with empty strings so that missing keys never raise.

    Parameters
    ----------
    placeholders_mapping : dict
        Mapping from placeholder names to replacement values.

    Returns
    -------
    Self
        This cell instance (mutated in-place) for method chaining.
    """
    if isinstance(self.source, list):
        new_source = []
        for line in self.source:
            line_mapping = {}
            for _, field_name, _, _ in Formatter().parse(line):
                if field_name is None:
                    continue
                line_mapping[field_name] = placeholders_mapping.get(field_name, "")

            new_source.append(line.format(**line_mapping))
        self.source = new_source

        return self

    self.source = self.source.format(**placeholders_mapping)

    return self

Template Rendering

templates

Functions

generate_notebook_from_template

generate_notebook_from_template(
    notebook_template: str,
    output_data: dict[str, Any],
    output_notebook_path: str | Path,
    test_data_key: str = "",
    input_data_key: str = "",
    ogx_base_url: str = "",
) -> None

Generate a filled notebook from a template and pattern configuration.

Loads the named template, substitutes all placeholders with values extracted from output_data, and writes the result to disk.

Parameters:

  • notebook_template (str) –

    Template base name without the _template.ipynb suffix (e.g. "ogx_inference" or "ogx_indexing").

  • output_data (dict[str, Any]) –

    The parsed pattern.json data.

  • output_notebook_path (str | Path) –

    Path where the generated notebook is saved.

  • test_data_key (str, default: "" ) –

    S3 key of the test data file used as input to AI4RAG.

  • input_data_key (str, default: "" ) –

    S3 key of the documents directory used as input to AI4RAG.

  • ogx_base_url (str, default: "" ) –

    Base URL for the OGX API.

Source code in ai4rag/components/assets_generator/templates.py
def generate_notebook_from_template(
    notebook_template: str,
    output_data: dict[str, Any],
    output_notebook_path: str | Path,
    test_data_key: str = "",
    input_data_key: str = "",
    ogx_base_url: str = "",
) -> None:
    """Generate a filled notebook from a template and pattern configuration.

    Loads the named template, substitutes all placeholders with values
    extracted from *output_data*, and writes the result to disk.

    Parameters
    ----------
    notebook_template : str
        Template base name without the ``_template.ipynb`` suffix
        (e.g. ``"ogx_inference"`` or ``"ogx_indexing"``).
    output_data : dict[str, Any]
        The parsed ``pattern.json`` data.
    output_notebook_path : str | Path
        Path where the generated notebook is saved.
    test_data_key : str, default=""
        S3 key of the test data file used as input to AI4RAG.
    input_data_key : str, default=""
        S3 key of the documents directory used as input to AI4RAG.
    ogx_base_url : str, default=""
        Base URL for the OGX API.
    """
    placeholder_mapping = create_placeholder_mapping(
        output_data,
        test_data_key=test_data_key,
        input_data_key=input_data_key,
        ogx_base_url=ogx_base_url,
    )
    notebook = Notebook.load(
        notebook_name=f"{notebook_template}_template.ipynb",
    )
    filled_cells = [cell.format_source(placeholder_mapping) for cell in notebook.cells]

    notebook = Notebook(cells=filled_cells)
    notebook.save(Path(output_notebook_path))

create_placeholder_mapping

create_placeholder_mapping(
    output_data: dict[str, Any], test_data_key: str = "", input_data_key: str = "", ogx_base_url: str = ""
) -> dict[str, Any]

Create a mapping from placeholder names to their values from a pattern definition.

Extracts values from the pattern.json structure produced by the optimisation pipeline and returns a flat dictionary suitable for NotebookCell.format_source().

Parameters:

  • output_data (dict[str, Any]) –

    The parsed pattern.json data.

  • test_data_key (str, default: "" ) –

    S3 key of the test data file used as input to AI4RAG.

  • input_data_key (str, default: "" ) –

    S3 key of the documents directory used as input to AI4RAG.

  • ogx_base_url (str, default: "" ) –

    Base URL for the OGX API. Falls back to an empty string when not provided, allowing the generated notebook to prompt users for the URL.

Returns:

  • dict[str, Any]

    Dictionary mapping placeholder names to their values.

Source code in ai4rag/components/assets_generator/templates.py
def create_placeholder_mapping(
    output_data: dict[str, Any],
    test_data_key: str = "",
    input_data_key: str = "",
    ogx_base_url: str = "",
) -> dict[str, Any]:
    """Create a mapping from placeholder names to their values from a pattern definition.

    Extracts values from the ``pattern.json`` structure produced by the
    optimisation pipeline and returns a flat dictionary suitable for
    ``NotebookCell.format_source()``.

    Parameters
    ----------
    output_data : dict[str, Any]
        The parsed ``pattern.json`` data.
    test_data_key : str, default=""
        S3 key of the test data file used as input to AI4RAG.
    input_data_key : str, default=""
        S3 key of the documents directory used as input to AI4RAG.
    ogx_base_url : str, default=""
        Base URL for the OGX API.  Falls back to an empty string when not
        provided, allowing the generated notebook to prompt users for the URL.

    Returns
    -------
    dict[str, Any]
        Dictionary mapping placeholder names to their values.
    """
    mapping: dict[str, Any] = {}

    mapping["AI4RAG_VERSION"] = __version__
    mapping["PATTERN_NAME"] = output_data.get("name", "")
    settings = output_data.get("settings", {})
    fm = settings.get("generation", {})
    mapping["FM_MODEL_ID"] = fm.get("model_id", "")
    mapping["SYSTEM_MESSAGE"] = fm.get("system_message_text", "")
    mapping["USER_MESSAGE"] = fm.get("user_message_text", "")
    mapping["CONTEXT_TEXT"] = fm.get("context_template_text", "")

    em = settings.get("embedding", {})
    mapping["EMBEDDING_MODEL_ID"] = em.get("model_id", "")
    mapping["EMBEDDING_PARAMS"] = em.get("embedding_params", {"embedding_dimension": 768})
    vs = settings.get("vector_store_binding", {})
    mapping["PROVIDER_ID"] = vs.get("provider_id", "")
    mapping["COLLECTION_NAME"] = vs.get("vector_store_id", "")

    ret = settings.get("retrieval", {})
    mapping["RETRIEVAL_METHOD"] = ret.get("method", "")
    mapping["NUMBER_OF_CHUNKS"] = ret.get("number_of_chunks", 5)
    mapping["SEARCH_MODE"] = ret.get("search_mode")
    mapping["RANKER_STRATEGY"] = ret.get("ranker_strategy")
    mapping["RANKER_K"] = ret.get("ranker_k")
    mapping["RANKER_ALPHA"] = ret.get("ranker_alpha")

    ch = settings.get("chunking", {})
    mapping["CHUNKING_METHOD"] = ch.get("method", "")
    mapping["CHUNK_SIZE"] = ch.get("chunk_size", 512)
    mapping["CHUNK_OVERLAP"] = ch.get("chunk_overlap", 50)

    mapping["TEST_DATA_KEY"] = test_data_key
    mapping["INPUT_DATA_KEY"] = input_data_key

    mapping["OGX_CLIENT_BASE_URL"] = ogx_base_url.strip() if ogx_base_url else ""

    return mapping

Pattern Builder

pattern_builder

Functions

build_pattern_json

build_pattern_json(pattern: dict, detected_language: dict | None = None) -> dict

Update pattern information with detected language and responses template.

Parameters:

  • pattern (dict) –

    A single evaluation result object carrying indexing_params, rag_params, pattern_name, collection, etc.

  • detected_language (dict | None, default: None ) –

    Language detection result ({"code": "...", "name": "..."}).

Returns:

  • dict

    Pattern definition suitable for JSON serialisation.

Source code in ai4rag/components/assets_generator/pattern_builder.py
def build_pattern_json(
    pattern: dict,
    detected_language: dict | None = None,
) -> dict:
    """Update pattern information with detected language and responses template.

    Parameters
    ----------
    pattern : dict
        A single evaluation result object carrying ``indexing_params``,
        ``rag_params``, ``pattern_name``, ``collection``, etc.
    detected_language : dict | None, default=None
        Language detection result (``{"code": "...", "name": "..."}``).

    Returns
    -------
    dict
        Pattern definition suitable for JSON serialisation.
    """
    if detected_language:
        pattern["settings"]["generation"]["detected_language"] = detected_language

    pattern["settings"]["responses_template"] = {
        "model": pattern["settings"]["generation"]["model_id"],
        "stream": False,
        "store": False,
        "input": "<user_query_placeholder>",
        "instructions": pattern["settings"]["generation"]["system_message_text"],
        "tools": [
            {
                "type": "file_search",
                "vector_store_ids": [pattern["settings"]["vector_store_binding"]["vector_store_id"]],
                "ranking_options": {
                    "max_num_results": pattern["settings"]["retrieval"]["number_of_chunks"],
                },
            },
        ],
        "include": ["file_search_call.results"],
    }

    retrieval_settings = pattern["settings"]["retrieval"]
    search_mode = retrieval_settings.get("search_mode")
    ranker_strategy = retrieval_settings.get("ranker_strategy")
    ranker_k = retrieval_settings.get("ranker_k")
    ranker_alpha = retrieval_settings.get("ranker_alpha")

    if search_mode == "hybrid" and ranker_strategy == "rrf" and ranker_k is not None and ranker_k > 0:
        pattern["settings"]["responses_template"]["tools"][0]["ranking_options"]["impact_factor"] = ranker_k
    elif search_mode == "hybrid" and ranker_strategy == "weighted" and ranker_alpha is not None and ranker_alpha != 1:
        pattern["settings"]["responses_template"]["tools"][0]["ranking_options"]["alpha"] = ranker_alpha

    return pattern