Skip to content

Vector Stores API

Milvus (Llama Stack)

llama_stack

Classes

LSVectorStore

LSVectorStore(
    embedding_model: LSEmbeddingModel,
    client: LlamaStackClient,
    provider_id: str,
    reuse_collection_name: str | None = None,
    distance_metric: str | None = None,
)

Bases: BaseVectorStore

LLamaStack client wrapper used for communication with vector store (single index/collection).

Source code in ai4rag/rag/vector_store/llama_stack.py
def __init__(
    self,
    embedding_model: LSEmbeddingModel,
    client: LlamaStackClient,
    provider_id: str,
    reuse_collection_name: str | None = None,
    distance_metric: str | None = None,
):
    super().__init__(embedding_model, distance_metric, reuse_collection_name)
    self.client = client
    self._ls_vs = self._initialize_ls_vector_store(
        client=client,
        embedding_model=embedding_model,
        provider_id=provider_id,
        reuse_collection_name=reuse_collection_name,
    )
    self._collection_name = self._ls_vs.id
Functions
search
search(
    query: str,
    k: int,
    include_scores: bool = False,
    search_mode: str = "vector",
    ranker_strategy: str | None = None,
    ranker_k: int | None = None,
    ranker_alpha: float | None = None,
    **kwargs
) -> list[Document] | list[tuple[Document, float]]

Search for the chunks relevant to the query.

Parameters:

  • query (str) –

    Question / query for which the similarity search will be executed.

  • k (int) –

    Number of chunks to be returned as a result of similarity search

  • include_scores (bool, default: False ) –

    If True, similarity scores will be returned in the response

  • search_mode (str, default: "vector" ) –

    Search mode: "vector" or "hybrid".

  • ranker_strategy (str | None, default: None ) –

    Ranking strategy for hybrid search: "rrf", "weighted", or "normalized". Empty string means no ranker (used for non-hybrid modes).

  • ranker_k (int | None, default: None ) –

    Parameter k for the ranking function. 0 means not set.

  • ranker_alpha (float, default: None ) –

    Alpha parameter for weighted ranking strategy. 1 means not set (vector-only sentinel).

Returns:

  • list[Document] | list[tuple[Document, float]]

    List of chunks as Document instances with or without scores, depending on the input.

Source code in ai4rag/rag/vector_store/llama_stack.py
def search(
    self,
    query: str,
    k: int,
    include_scores: bool = False,
    search_mode: str = "vector",
    ranker_strategy: str | None = None,
    ranker_k: int | None = None,
    ranker_alpha: float | None = None,
    **kwargs,
) -> list[Document] | list[tuple[Document, float]]:
    """
    Search for the chunks relevant to the query.

    Parameters
    ----------
    query : str
        Question / query for which the similarity search will be executed.

    k : int
        Number of chunks to be returned as a result of similarity search

    include_scores : bool, default=False
        If True, similarity scores will be returned in the response

    search_mode : str, default="vector"
        Search mode: "vector" or "hybrid".

    ranker_strategy : str | None, default=None
        Ranking strategy for hybrid search: "rrf", "weighted", or "normalized".
        Empty string means no ranker (used for non-hybrid modes).

    ranker_k : int | None, default=None
        Parameter k for the ranking function. 0 means not set.

    ranker_alpha : float, default=None
        Alpha parameter for weighted ranking strategy. 1 means not set (vector-only sentinel).

    Returns
    -------
    list[Document] | list[tuple[Document, float]]
        List of chunks as Document instances with or without scores, depending on the input.
    """
    self._validate_search_params(search_mode, ranker_strategy, ranker_k, ranker_alpha)
    params = {
        "max_chunks": k,
        "mode": search_mode,
    }

    if search_mode == "hybrid" and ranker_strategy:
        params["reranker_type"] = ranker_strategy
        reranker_params = {}
        if ranker_strategy == "rrf" and ranker_k is not None and ranker_k > 0:
            reranker_params["impact_factor"] = ranker_k
        if ranker_strategy == "weighted" and ranker_alpha is not None and ranker_alpha != 1:
            reranker_params["alpha"] = ranker_alpha
        params["reranker_params"] = reranker_params

    resp = self.client.vector_io.query(query=query, vector_store_id=self._ls_vs.id, params=params)

    if include_scores:
        return [
            (Document(page_content=chunk.content, metadata=chunk.chunk_metadata.to_dict()), score)
            for chunk, score in zip(resp.chunks, resp.scores)
        ]

    return [Document(page_content=chunk.content, metadata=chunk.chunk_metadata.to_dict()) for chunk in resp.chunks]
add_documents
add_documents(documents: list[Document]) -> None

Add documents to the collection.

Parameters:

  • documents (Sequence[Document]) –

    Documents to add to the collection.

Source code in ai4rag/rag/vector_store/llama_stack.py
def add_documents(self, documents: list[Document]) -> None:
    """
    Add documents to the collection.

    Parameters
    ----------
    documents : Sequence[Document]
        Documents to add to the collection.
    """

    # Handle both dict and LSEmbeddingParams for backward compatibility
    if isinstance(self.embedding_model.params, dict):
        embedding_dimension = self.embedding_model.params["embedding_dimension"]
    else:
        embedding_dimension = self.embedding_model.params.embedding_dimension

    chunks = [
        {
            "content": doc.page_content,
            "chunk_metadata": doc.metadata,
            "chunk_id": doc.metadata["document_id"],
            "embedding_model": self.embedding_model.model_id,
            "embedding_dimension": embedding_dimension,
        }
        for doc in documents
    ]
    embeddings = self.embedding_model.embed_documents([doc.page_content for doc in documents])
    full_chunks = [chunk | {"embedding": embedding} for chunk, embedding in zip(chunks, embeddings)]
    self.client.vector_io.insert(
        vector_store_id=self._ls_vs.id,
        chunks=full_chunks,
    )
clean_collection
clean_collection()

Remove content of the collection and remove vector store instance.

Source code in ai4rag/rag/vector_store/llama_stack.py
def clean_collection(self):
    """Remove content of the collection and remove vector store instance."""
    self.client.vector_stores.delete(self._ls_vs.id)

ChromaDB

chroma

Classes

ChromaVectorStore

ChromaVectorStore(
    embedding_model: BaseEmbeddingModel,
    reuse_collection_name: str | None = None,
    distance_metric: str = "cosine",
    document_name_field: str = "document_id",
    chunk_sequence_number_field: str = "sequence_number",
    **kwargs
)

Bases: BaseVectorStore

Class representing single index in the chroma vector database.

Parameters:

  • embedding_model (BaseEmbeddingModel) –

    Instance used for embedding documents and user's queries.

  • reuse_collection_name (str, default: None ) –

    Name of the collection that will be created as a vector store.

  • distance_metric (str, default: "cosine" ) –

    Metric that will be used to calculate similarity score between vectors.

  • document_name_field (str, default: "document_id" ) –

    Default document ID field name.

  • chunk_sequence_number_field (str, default: "chunk_sequence_number" ) –

    Default chunk sequence number field name.

Source code in ai4rag/rag/vector_store/chroma.py
def __init__(
    self,
    embedding_model: BaseEmbeddingModel,
    reuse_collection_name: str | None = None,
    distance_metric: str = "cosine",
    document_name_field: str = "document_id",
    chunk_sequence_number_field: str = "sequence_number",
    **kwargs,
) -> None:
    super().__init__(
        embedding_model=embedding_model,
        distance_metric=distance_metric,
        reuse_collection_name=reuse_collection_name,
    )
    self._document_name_field = document_name_field
    self._chunk_sequence_number_field = chunk_sequence_number_field
    self._collection_name = reuse_collection_name or kwargs.pop(
        "collection_name", f"ai4rag_{datetime.now().strftime("%Y%m%d%H%M%S")}"
    )
    self._vector_store = self._get_chroma_client(**kwargs)
Attributes
distance_metric property writable
distance_metric: str

Get used distance metric.

collection_name property
collection_name: str

Dynamically get collection name.

Functions
clear
clear() -> None

Clear the vector store.

Source code in ai4rag/rag/vector_store/chroma.py
def clear(self) -> None:
    """Clear the vector store."""
    all_docs_ids = self._vector_store.get()["ids"]
    if len(all_docs_ids) > 0:
        self.delete(all_docs_ids)
count
count() -> int

Count the number of shards in the vector store.

Returns:

  • int

    Number of shards in the vector store.

Source code in ai4rag/rag/vector_store/chroma.py
def count(self) -> int:
    """Count the number of shards in the vector store.

    Returns
    -------
    int
        Number of shards in the vector store.
    """
    return len(self._vector_store.get()["ids"])
add_documents
add_documents(documents: list, **kwargs: Any) -> list[str]

Embed and add documents to the vector store.

Parameters:

  • documents (list) –

    Documents to be embedded and added to the vector store.

Returns:

  • list[str]

    List of documents IDs.

Source code in ai4rag/rag/vector_store/chroma.py
def add_documents(self, documents: list, **kwargs: Any) -> list[str]:
    """
    Embed and add documents to the vector store.

    Parameters
    ----------
    documents : list
        Documents to be embedded and added to the vector store.

    Returns
    -------
    list[str]
        List of documents IDs.
    """
    max_batch_size = kwargs.get("max_batch_size")
    if max_batch_size is None:
        try:
            max_batch_size = self._vector_store._client.get_max_batch_size()  # pylint: disable=protected-access
        except AttributeError:
            max_batch_size = 10_000

    ids, docs = self._process_documents(documents)
    if len(docs) > max_batch_size:
        batch_ids = []

        for batch_start in range(0, len(docs), max_batch_size):
            batch_ids.extend(
                self._vector_store.add_documents(
                    docs[batch_start : batch_start + max_batch_size],
                    ids=ids[batch_start : batch_start + max_batch_size],
                    **kwargs,
                )
            )
        return batch_ids

    return self._vector_store.add_documents(docs, ids=ids, **kwargs)
search
search(
    query: str, k: int = 5, include_scores: bool = False, **kwargs: Any
) -> list[Document] | list[tuple[Document, float]]

Searches for documents most similar to the query.

The method is designed as a wrapper for respective LangChain VectorStores' similarity search methods. Therefore, additional search parameters passed in kwargs should be consistent with those methods, and can be found in the LangChain documentation.

Parameters:

  • query (str) –

    Query for which grounding documents will be searched for.

  • k (int, default: 5 ) –

    Number of documents to retrieve

  • include_scores (bool, default: False ) –

    Whether similarity scores of found documents should be returned.

Returns:

  • list[Document] | list[tuple[Document, float]]

    Found documents with or without scores.

Source code in ai4rag/rag/vector_store/chroma.py
def search(
    self,
    query: str,
    k: int = 5,
    include_scores: bool = False,
    **kwargs: Any,
) -> list[Document] | list[tuple[Document, float]]:
    """Searches for documents most similar to the query.

    The method is designed as a wrapper for respective LangChain VectorStores' similarity search methods.
    Therefore, additional search parameters passed in ``kwargs`` should be consistent with those methods,
    and can be found in the LangChain documentation.

    Parameters
    ----------
    query : str
        Query for which grounding documents will be searched for.

    k : int, default=5
        Number of documents to retrieve

    include_scores : bool, default=False
        Whether similarity scores of found documents should be returned.

    Returns
    -------
    list[Document] | list[tuple[Document, float]]
        Found documents with or without scores.
    """
    filtered_kwargs = {k_: v for k_, v in kwargs.items() if k_ not in self._HYBRID_KWARGS}
    if include_scores:
        result = self._vector_store.similarity_search_with_score(query, k=k, **filtered_kwargs)
    else:
        result = self._vector_store.similarity_search(query, k=k, **filtered_kwargs)

    return result
window_search(query: str, k: int = 5, include_scores: bool = False, window_size: int = 2, **kwargs: Any) -> list

Searches for documents most similar to the query and extend a document (a chunk) to its adjacent chunks (if they exist) from the same origin document.

The method is designed as a wrapper for respective LangChain VectorStores' similarity search methods. Therefore, additional search parameters passed in kwargs should be consistent with those methods, and can be found in the LangChain documentation.

Parameters:

  • query (str) –

    Query for which grounding documents will be searched for.

  • k (int, default: 5 ) –

    Number of documents to retrieve

  • include_scores (bool, default: False ) –

    Whether similarity scores of found documents should be returned.

  • window_size (int, default: 2 ) –

    Number of chunks from right and left side of the original chunk.

Returns:

  • list

    Found documents with or without scores.

Source code in ai4rag/rag/vector_store/chroma.py
def window_search(
    self,
    query: str,
    k: int = 5,
    include_scores: bool = False,
    window_size: int = 2,
    **kwargs: Any,
) -> list:
    """
    Searches for documents most similar to the query and extend a document (a chunk)
    to its adjacent chunks (if they exist) from the same origin document.

    The method is designed as a wrapper for respective LangChain VectorStores' similarity search methods.
    Therefore, additional search parameters passed in ``kwargs`` should be consistent with those methods,
    and can be found in the LangChain documentation.

    Parameters
    ----------
    query : str
        Query for which grounding documents will be searched for.

    k : int, default=5
        Number of documents to retrieve

    include_scores : bool, default=False
        Whether similarity scores of found documents should be returned.

    window_size : int, default=2
        Number of chunks from right and left side of the original chunk.

    Returns
    -------
    list
        Found documents with or without scores.
    """
    documents = self.search(query, k, include_scores, **kwargs)
    if window_size <= 0:
        return documents

    if not include_scores:
        documents = cast(list[Document], documents)
        return [self._window_extend_and_merge(document, window_size) for document in documents]

    documents_and_scores = cast(list[tuple[Document, float]], documents)
    documents = [t[0] for t in documents_and_scores]
    scores = [t[1] for t in documents_and_scores]
    extended_documents = [self._window_extend_and_merge(document, window_size) for document in documents]
    return list(zip(extended_documents, scores))
delete
delete(ids: list[str], **kwargs: Any) -> None

Delete by vector ID or other criteria. Sor more details see LangChain documentation https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html#langchain_core.vectorstores.base.VectorStore

Source code in ai4rag/rag/vector_store/chroma.py
def delete(self, ids: list[str], **kwargs: Any) -> None:
    """Delete by vector ID or other criteria. Sor more details see LangChain documentation
    https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html#langchain_core.vectorstores.base.VectorStore
    """
    self._vector_store.delete(ids, **kwargs)

Functions