"""Position models for issues, varying by media type.
Each position model serializes to a GeoJSON-compatible dict for the
``position: Json`` GraphQL field.
"""
import json
import logging
from typing import Any, Dict, List, Optional, Union
from pydantic import BaseModel, field_validator
from labelbox.schema.media_type import MediaType
logger = logging.getLogger(__name__)
[docs]class ImageIssuePosition(BaseModel):
"""Pin position on an image asset.
Attributes:
x: Horizontal pixel coordinate.
y: Vertical pixel coordinate.
"""
x: int
y: int
def to_dict(self) -> dict:
return {"type": "Point", "coordinates": [self.x, self.y]}
[docs]class PdfIssuePosition(BaseModel):
"""Pin position on a PDF page.
Coordinates are expressed as percentages (0.0 – 1.0) of the page
dimensions, matching the backend ``PERCENT`` unit.
Attributes:
x: Horizontal position as a fraction of page width (0.0 – 1.0).
y: Vertical position as a fraction of page height (0.0 – 1.0).
page: Zero-based page index.
"""
x: float
y: float
page: int
@field_validator("x", "y")
@classmethod
def _check_range(cls, v: float) -> float:
if not (0.0 <= v <= 1.0):
raise ValueError(
"PDF coordinates must be between 0.0 and 1.0 (percentage). "
f"Got {v}."
)
return v
def to_dict(self) -> dict:
return {
"type": "Point",
"coordinates": [self.x, self.y],
"page": self.page,
"unit": "PERCENT",
}
[docs]class TextIssuePosition(BaseModel):
"""Character range within a text block.
Attributes:
text_block_id: Identifier of the text block.
start_char_index: Start character index (inclusive).
end_char_index: End character index (exclusive).
"""
text_block_id: str
start_char_index: int
end_char_index: int
def to_dict(self) -> dict:
return {
"textBlockId": self.text_block_id,
"startCharIndex": self.start_char_index,
"endCharIndex": self.end_char_index,
}
[docs]class VideoFrameRange(BaseModel):
"""A contiguous frame range with optional moving coordinates.
For a single frame, set ``start == end``. When ``start == end`` the
``end_x`` / ``end_y`` fields are ignored during serialization.
Attributes:
start: Start frame number.
end: End frame number (equal to *start* for a single frame).
x: Horizontal pixel coordinate at *start*.
y: Vertical pixel coordinate at *start*.
end_x: Horizontal pixel coordinate at *end* (moving pin). Ignored
when ``start == end``.
end_y: Vertical pixel coordinate at *end* (moving pin). Ignored
when ``start == end``.
"""
start: int
end: int
x: int
y: int
end_x: Optional[int] = None
end_y: Optional[int] = None
[docs]class VideoIssuePosition(BaseModel):
"""Pin position(s) on a video asset.
Supports single frames, contiguous ranges, and multiple separated
ranges (with optional moving coordinates).
Attributes:
frames: One or more :class:`VideoFrameRange` entries.
"""
frames: List[VideoFrameRange]
[docs] def to_dict(self) -> dict:
"""Serialize to KeyframesGeoJSONPoint format."""
keyframes: list = []
for fr in self.frames:
start_entry = {
"frame": fr.start,
"value": {
"type": "Point",
"coordinates": [fr.x, fr.y],
},
}
keyframes.append(start_entry)
# Only emit a separate end keyframe when the range spans
# multiple frames.
if fr.end != fr.start:
end_entry = {
"frame": fr.end,
"value": {
"type": "Point",
"coordinates": [
fr.end_x if fr.end_x is not None else fr.x,
fr.end_y if fr.end_y is not None else fr.y,
],
},
}
keyframes.append(end_entry)
return {"type": "KeyframesGeoJSONPoint", "keyframes": keyframes}
IssuePosition = Union[
ImageIssuePosition,
PdfIssuePosition,
TextIssuePosition,
VideoIssuePosition,
]
MEDIA_TYPE_POSITION_MAP: Dict[MediaType, type] = {
MediaType.Image: ImageIssuePosition,
MediaType.Video: VideoIssuePosition,
MediaType.Text: TextIssuePosition,
MediaType.Document: PdfIssuePosition,
MediaType.Pdf: PdfIssuePosition,
}
def _deserialize_position(
raw: Optional[Union[str, dict]],
) -> Optional[IssuePosition]:
"""Convert a raw position value from GraphQL into a typed model.
Returns ``None`` (with a warning) when the structure is unrecognized,
ensuring forward-compatibility with new media types.
"""
if raw is None:
return None
data: Any # Use Any for safer checking after json.loads
if isinstance(raw, str):
try:
data = json.loads(raw)
if data is None:
return None
except (json.JSONDecodeError, TypeError):
return None
else:
data = raw
if not isinstance(data, dict):
return None
try:
# PDF – has "page" key
if "page" in data:
coords = data.get("coordinates", [0.0, 0.0])
return PdfIssuePosition(x=coords[0], y=coords[1], page=data["page"])
# Text – has "textBlockId" key
if "textBlockId" in data:
return TextIssuePosition(
text_block_id=data["textBlockId"],
start_char_index=data["startCharIndex"],
end_char_index=data["endCharIndex"],
)
# Video – KeyframesGeoJSONPoint
if data.get("type") == "KeyframesGeoJSONPoint":
frames: List[VideoFrameRange] = []
kf_list = data.get("keyframes", [])
i = 0
while i < len(kf_list):
kf = kf_list[i]
start_frame = kf["frame"]
start_coords = kf["value"]["coordinates"]
# Look ahead for an end keyframe
if i + 1 < len(kf_list):
next_kf = kf_list[i + 1]
next_coords = next_kf["value"]["coordinates"]
end_frame = next_kf["frame"]
if end_frame != start_frame:
frames.append(
VideoFrameRange(
start=start_frame,
end=end_frame,
x=int(start_coords[0]),
y=int(start_coords[1]),
end_x=int(next_coords[0]),
end_y=int(next_coords[1]),
)
)
i += 2
continue
# Single frame or last entry
frames.append(
VideoFrameRange(
start=start_frame,
end=start_frame,
x=int(start_coords[0]),
y=int(start_coords[1]),
)
)
i += 1
return VideoIssuePosition(frames=frames)
# Image – plain GeoJSON Point
if data.get("type") == "Point":
coords = data.get("coordinates", [0, 0])
return ImageIssuePosition(x=int(coords[0]), y=int(coords[1]))
except (KeyError, IndexError, TypeError, ValueError) as exc:
logger.warning(
"Failed to deserialize issue position: %s (%s)", data, exc
)
return None
logger.warning("Unrecognized issue position structure: %s", data)
return None