Source code for labelbox.schema.issue_position

"""Position models for issues, varying by media type.

Each position model serializes to a GeoJSON-compatible dict for the
``position: Json`` GraphQL field.
"""

import json
import logging
from typing import Any, Dict, List, Optional, Union

from pydantic import BaseModel, field_validator

from labelbox.schema.media_type import MediaType

logger = logging.getLogger(__name__)


[docs]class ImageIssuePosition(BaseModel):
    """Pin position on an image asset.

    Attributes:
        x: Horizontal pixel coordinate.
        y: Vertical pixel coordinate.
    """

    x: int
    y: int

    def to_dict(self) -> dict:
        return {"type": "Point", "coordinates": [self.x, self.y]}


[docs]class PdfIssuePosition(BaseModel):
    """Pin position on a PDF page.

    Coordinates are expressed as percentages (0.0 – 1.0) of the page
    dimensions, matching the backend ``PERCENT`` unit.

    Attributes:
        x: Horizontal position as a fraction of page width (0.0 – 1.0).
        y: Vertical position as a fraction of page height (0.0 – 1.0).
        page: Zero-based page index.
    """

    x: float
    y: float
    page: int

    @field_validator("x", "y")
    @classmethod
    def _check_range(cls, v: float) -> float:
        if not (0.0 <= v <= 1.0):
            raise ValueError(
                "PDF coordinates must be between 0.0 and 1.0 (percentage). "
                f"Got {v}."
            )
        return v

    def to_dict(self) -> dict:
        return {
            "type": "Point",
            "coordinates": [self.x, self.y],
            "page": self.page,
            "unit": "PERCENT",
        }


[docs]class TextIssuePosition(BaseModel):
    """Character range within a text block.

    Attributes:
        text_block_id: Identifier of the text block.
        start_char_index: Start character index (inclusive).
        end_char_index: End character index (exclusive).
    """

    text_block_id: str
    start_char_index: int
    end_char_index: int

    def to_dict(self) -> dict:
        return {
            "textBlockId": self.text_block_id,
            "startCharIndex": self.start_char_index,
            "endCharIndex": self.end_char_index,
        }


[docs]class VideoFrameRange(BaseModel):
    """A contiguous frame range with optional moving coordinates.

    For a single frame, set ``start == end``.  When ``start == end`` the
    ``end_x`` / ``end_y`` fields are ignored during serialization.

    Attributes:
        start: Start frame number.
        end: End frame number (equal to *start* for a single frame).
        x: Horizontal pixel coordinate at *start*.
        y: Vertical pixel coordinate at *start*.
        end_x: Horizontal pixel coordinate at *end* (moving pin). Ignored
            when ``start == end``.
        end_y: Vertical pixel coordinate at *end* (moving pin). Ignored
            when ``start == end``.
    """

    start: int
    end: int
    x: int
    y: int
    end_x: Optional[int] = None
    end_y: Optional[int] = None


[docs]class VideoIssuePosition(BaseModel):
    """Pin position(s) on a video asset.

    Supports single frames, contiguous ranges, and multiple separated
    ranges (with optional moving coordinates).

    Attributes:
        frames: One or more :class:`VideoFrameRange` entries.
    """

    frames: List[VideoFrameRange]

[docs]    def to_dict(self) -> dict:
        """Serialize to KeyframesGeoJSONPoint format."""
        keyframes: list = []
        for fr in self.frames:
            start_entry = {
                "frame": fr.start,
                "value": {
                    "type": "Point",
                    "coordinates": [fr.x, fr.y],
                },
            }
            keyframes.append(start_entry)
            # Only emit a separate end keyframe when the range spans
            # multiple frames.
            if fr.end != fr.start:
                end_entry = {
                    "frame": fr.end,
                    "value": {
                        "type": "Point",
                        "coordinates": [
                            fr.end_x if fr.end_x is not None else fr.x,
                            fr.end_y if fr.end_y is not None else fr.y,
                        ],
                    },
                }
                keyframes.append(end_entry)
        return {"type": "KeyframesGeoJSONPoint", "keyframes": keyframes}


IssuePosition = Union[
    ImageIssuePosition,
    PdfIssuePosition,
    TextIssuePosition,
    VideoIssuePosition,
]

MEDIA_TYPE_POSITION_MAP: Dict[MediaType, type] = {
    MediaType.Image: ImageIssuePosition,
    MediaType.Video: VideoIssuePosition,
    MediaType.Text: TextIssuePosition,
    MediaType.Document: PdfIssuePosition,
    MediaType.Pdf: PdfIssuePosition,
}


def _deserialize_position(
    raw: Optional[Union[str, dict]],
) -> Optional[IssuePosition]:
    """Convert a raw position value from GraphQL into a typed model.

    Returns ``None`` (with a warning) when the structure is unrecognized,
    ensuring forward-compatibility with new media types.
    """
    if raw is None:
        return None

    data: Any  # Use Any for safer checking after json.loads
    if isinstance(raw, str):
        try:
            data = json.loads(raw)
            if data is None:
                return None
        except (json.JSONDecodeError, TypeError):
            return None
    else:
        data = raw

    if not isinstance(data, dict):
        return None

    try:
        # PDF – has "page" key
        if "page" in data:
            coords = data.get("coordinates", [0.0, 0.0])
            return PdfIssuePosition(x=coords[0], y=coords[1], page=data["page"])

        # Text – has "textBlockId" key
        if "textBlockId" in data:
            return TextIssuePosition(
                text_block_id=data["textBlockId"],
                start_char_index=data["startCharIndex"],
                end_char_index=data["endCharIndex"],
            )

        # Video – KeyframesGeoJSONPoint
        if data.get("type") == "KeyframesGeoJSONPoint":
            frames: List[VideoFrameRange] = []
            kf_list = data.get("keyframes", [])
            i = 0
            while i < len(kf_list):
                kf = kf_list[i]
                start_frame = kf["frame"]
                start_coords = kf["value"]["coordinates"]
                # Look ahead for an end keyframe
                if i + 1 < len(kf_list):
                    next_kf = kf_list[i + 1]
                    next_coords = next_kf["value"]["coordinates"]
                    end_frame = next_kf["frame"]
                    if end_frame != start_frame:
                        frames.append(
                            VideoFrameRange(
                                start=start_frame,
                                end=end_frame,
                                x=int(start_coords[0]),
                                y=int(start_coords[1]),
                                end_x=int(next_coords[0]),
                                end_y=int(next_coords[1]),
                            )
                        )
                        i += 2
                        continue
                # Single frame or last entry
                frames.append(
                    VideoFrameRange(
                        start=start_frame,
                        end=start_frame,
                        x=int(start_coords[0]),
                        y=int(start_coords[1]),
                    )
                )
                i += 1
            return VideoIssuePosition(frames=frames)

        # Image – plain GeoJSON Point
        if data.get("type") == "Point":
            coords = data.get("coordinates", [0, 0])
            return ImageIssuePosition(x=int(coords[0]), y=int(coords[1]))
    except (KeyError, IndexError, TypeError, ValueError) as exc:
        logger.warning(
            "Failed to deserialize issue position: %s (%s)", data, exc
        )
        return None

    logger.warning("Unrecognized issue position structure: %s", data)
    return None