Source code for labelbox.schema.embedding

from typing import Optional, Callable, Dict, Any, List

from labelbox.adv_client import AdvClient
from labelbox.pydantic_compat import BaseModel, PrivateAttr


class EmbeddingVector(BaseModel):
    """
    A Vector Embedding for Custom Embedding.

    Attributes:
        embedding_id (str): The ID of the associated Embedding
        vector (list): The raw vector values - the number of entries should match the Embedding's dimensions
        clusters (list): The cluster groupings
    """
    embedding_id: str
    vector: List[float]
    clusters: Optional[List[int]]

    def to_gql(self) -> Dict[str, Any]:
        result = {"embeddingId": self.embedding_id, "vector": self.vector}
        if self.clusters:
            result["clusters"] = self.clusters
        return result


[docs]class Embedding(BaseModel): """ An Embedding is used to power similarity search in Catalog. This model supports the representation of both `Precomputed` embeddings that Labelbox provides, and `Custom` embeddings which can be imported directly into Labelbox. Attributes: id (str): The ID of the embedding name (str): The name of the embedding dims (int): Refers to the size of the vector space in which words, phrases, or other entities are embedded custom (bool): Indicates whether the embedding is a Precomputed embedding or a Custom embedding """ id: str name: str custom: bool dims: int _client: AdvClient = PrivateAttr() def __init__(self, client: AdvClient, **data): super().__init__(**data) self._client = client
[docs] def delete(self): """ Delete a custom embedding. If the embedding does not exist or cannot be deleted, an AdvLibException is raised. """ self._client.delete_embedding(self.id)
[docs] def import_vectors_from_file(self, path: str, callback: Optional[Callable[[Dict[str, Any]], None]] = None): """ Import vectors into a given embedding from an NDJSON file. An NDJSON file consists of newline delimited JSON. Each line of the file is valid JSON, but the entire file itself is NOT. The format of the file looks like: {"id": DATAROW ID1, "vector": [ array of floats ]}\n {"id": DATAROW ID2, "vector": [ array of floats ]}\n {"id": DATAROW ID3, "vector": [ array of floats ]} The vectors are added to the system in an async manner and it may take up to a couple minutes before they are usable via similarity search. Note that you also need to upload at least 1000 vectors in order for similarity search to be activated. Args: path: The path to the NDJSON file. callback: a callback function used get the status of each batch of lines uploaded. """ self._client.import_vectors_from_file(self.id, path, callback)
[docs] def get_imported_vector_count(self) -> int: """ Return the # of vectors actually imported into Labelbox. This will give you an accurate count of the number of vectors written into the vector search system. Returns: The number of imported vectors. """ return self._client.get_imported_vector_count(self.id)
@classmethod def type_name(cls): return cls.__name__.split(".")[-1]