from __future__ import annotations import json import math import re from typing import Any CAMERA_DETAIL_CHOICES = ["off", "compact", "full"] CAMERA_ORBIT_FRAMING_CHOICES = [ "from_zoom", "wide", "medium", "full_body", "three_quarter", "close_up", "extreme_close_up", ] CAMERA_ORBIT_FOCUS_CHOICES = [ "auto", "face", "torso", "hips", "full_body", "action", "contact_points", "environment", ] CAMERA_MODE_PROMPTS = { "disabled": "", "standard": "", "handheld_selfie": ( "Camera mode: handheld smartphone selfie, close arm-length framing, visible creator-shot perspective, " "slight wide-angle intimacy, direct eye contact, natural phone-camera composition." ), "mirror_selfie": ( "Camera mode: mirror selfie with the phone visible in one hand, reflective framing, creator looking at the screen, " "body and environment visible through the mirror." ), "phone_tripod": ( "Camera mode: phone on tripod or ring-light stand, creator-facing social-video framing, stable vertical composition, " "hands-free self-recorded setup." ), "creator_pov": ( "Camera mode: creator-held POV, intimate subscriber-view angle, the creator controls the camera, close foreground body framing." ), "bed_selfie": ( "Camera mode: bed selfie shot from a phone held above or beside the body, intimate close framing, sheets visible around the subject." ), "bathroom_mirror": ( "Camera mode: bathroom mirror selfie, phone visible, tiled private room, close vertical framing, candid creator-shot energy." ), "phone_flash": ( "Camera mode: direct phone-flash selfie, crisp flash highlights, candid night-post feeling, hard-edged smartphone shadows." ), "action_cam": ( "Camera mode: body-mounted or handheld action-camera intimacy, very close wide-angle perspective, dynamic creator-shot framing." ), } CAMERA_COMPACT_LABELS = { "disabled": "", "standard": "", "handheld_selfie": "handheld smartphone selfie", "mirror_selfie": "mirror selfie", "phone_tripod": "phone tripod / ring-light setup", "creator_pov": "creator-held POV", "bed_selfie": "bed selfie", "bathroom_mirror": "bathroom mirror selfie", "phone_flash": "phone-flash selfie", "action_cam": "handheld action-camera view", "full_body": "full body", "three_quarter": "three-quarter body", "waist_up": "waist-up", "close_up": "close-up", "extreme_close_up": "extreme close-up", "eye_level": "eye-level", "high_angle": "high-angle", "low_angle": "low-angle", "overhead": "overhead", "side_profile": "side-profile", "rear_view": "rear-view", "mirror_reflection": "mirror reflection", "smartphone_wide": "smartphone wide-angle", "ultra_wide": "ultra-wide", "portrait_lens": "phone portrait lens", "telephoto": "telephoto-style", "macro_detail": "macro detail", "arm_length": "arm-length", "near_body": "near-body", "bedside": "bedside phone", "room_corner": "room-corner phone", "vertical_story": "vertical 9:16", "square_feed": "square feed", "horizontal": "horizontal", "phone_visible": "phone visible", "phone_hidden": "phone hidden", "screen_reflection": "screen reflection", "ring_light_visible": "ring light visible", } CAMERA_SHOT_PROMPTS = { "auto": "", "full_body": "Shot size: full body visible, head-to-toe framing, no important body parts cropped out.", "three_quarter": "Shot size: three-quarter body framing, face, torso, hips, and thighs clearly visible.", "waist_up": "Shot size: waist-up creator framing with face and upper body as the focus.", "close_up": "Shot size: close-up framing with face, expression, hands, and body contact emphasized.", "extreme_close_up": "Shot size: extreme close-up detail shot, tightly framed and intimate.", } CAMERA_ANGLE_PROMPTS = { "auto": "", "eye_level": "Angle: eye-level camera angle with direct creator eye contact.", "high_angle": "Angle: high-angle selfie looking down toward the body.", "low_angle": "Angle: low-angle phone camera looking upward from near the body.", "overhead": "Angle: overhead phone shot looking down at the full pose.", "side_profile": "Angle: side-profile camera view emphasizing body silhouette and contact points.", "rear_view": "Angle: rear-view camera framing with the body turned away from the lens.", "mirror_reflection": "Angle: mirror-reflection composition with the phone and reflected body placement readable.", } CAMERA_LENS_PROMPTS = { "auto": "", "smartphone_wide": "Lens: smartphone wide-angle lens with slight edge distortion and close personal scale.", "ultra_wide": "Lens: ultra-wide phone lens, exaggerated near-camera perspective, environmental context visible.", "portrait_lens": "Lens: phone portrait mode, shallow depth of field, crisp subject separation.", "telephoto": "Lens: compressed telephoto-style framing, flatter proportions, less distortion.", "macro_detail": "Lens: macro-detail phone shot focused on texture, skin, fabric, and contact detail.", } CAMERA_DISTANCE_PROMPTS = { "auto": "", "arm_length": "Camera distance: arm-length selfie distance, close enough to feel handheld.", "near_body": "Camera distance: near-body camera placement with intimate foreground framing.", "bedside": "Camera distance: phone placed beside the body on the bed or floor.", "room_corner": "Camera distance: phone set across the room, self-recorded but wider and more observational.", } CAMERA_ORIENTATION_PROMPTS = { "auto": "", "vertical_story": "Orientation: vertical 9:16 story/reel framing.", "square_feed": "Orientation: square social-feed crop.", "horizontal": "Orientation: horizontal phone-video crop.", } CAMERA_PHONE_PROMPTS = { "auto": "", "phone_visible": "Phone visibility: phone visible in hand or mirror, clearly creator-shot.", "phone_hidden": "Phone visibility: phone is implied but not visible, preserving the selfie/creator-shot perspective.", "screen_reflection": "Phone visibility: screen glow or reflection visible in the scene.", "ring_light_visible": "Phone visibility: ring light or tripod visible enough to read as self-recorded content.", } CAMERA_PRIORITY_PROMPTS = { "soft_hint": "Camera priority: treat the camera notes as style guidance.", "strong": "Camera priority: strongly preserve the selected camera, lens, angle, crop, and phone-shot perspective.", "locked": "Camera priority: locked camera constraint; do not replace this with a studio, third-person, cinematic, or unrelated camera view.", } QWEN_CAMERA_DIRECTIONS = { "front-right quarter view": 45, "right side view": 90, "back-right quarter view": 135, "back view": 180, "back-left quarter view": 225, "left side view": 270, "front-left quarter view": 315, "front view": 0, } QWEN_CAMERA_ELEVATIONS = { "low-angle shot": -30, "eye-level shot": 0, "elevated shot": 30, "high-angle shot": 60, } QWEN_CAMERA_ZOOMS = { "wide shot": 0.0, "medium shot": 5.0, "close-up": 8.0, } QWEN_CAMERA_SCENE_CENTER_Y = 0.5 def _is_false(value: Any) -> bool: if isinstance(value, bool): return value is False if isinstance(value, str): return value.strip().lower() in ("false", "0", "no", "off") return False def _choice(value: Any, choices: dict[str, str], default: str) -> str: value = str(value or default) return value if value in choices else default def _clean_prompt_punctuation(text: str) -> str: text = re.sub(r"\s+", " ", str(text or "")).strip() text = re.sub(r"\s+([,.;:])", r"\1", text) text = re.sub(r"(?:,\s*){2,}", ", ", text) text = re.sub(r"\.\s*\.", ".", text) text = re.sub(r":\s*\.", ".", text) return text.strip() def camera_mode_choices() -> list[str]: return list(CAMERA_MODE_PROMPTS) def camera_detail_choices() -> list[str]: return list(CAMERA_DETAIL_CHOICES) def camera_orbit_framing_choices() -> list[str]: return list(CAMERA_ORBIT_FRAMING_CHOICES) def camera_orbit_focus_choices() -> list[str]: return list(CAMERA_ORBIT_FOCUS_CHOICES) def camera_shot_choices() -> list[str]: return list(CAMERA_SHOT_PROMPTS) def camera_angle_choices() -> list[str]: return list(CAMERA_ANGLE_PROMPTS) def camera_lens_choices() -> list[str]: return list(CAMERA_LENS_PROMPTS) def camera_distance_choices() -> list[str]: return list(CAMERA_DISTANCE_PROMPTS) def camera_orientation_choices() -> list[str]: return list(CAMERA_ORIENTATION_PROMPTS) def camera_phone_choices() -> list[str]: return list(CAMERA_PHONE_PROMPTS) def camera_priority_choices() -> list[str]: return list(CAMERA_PRIORITY_PROMPTS) def build_camera_config_json( camera_mode: str = "standard", shot_size: str = "auto", angle: str = "auto", lens: str = "auto", distance: str = "auto", orientation: str = "auto", phone_visibility: str = "auto", priority: str = "strong", camera_detail: str = "compact", ) -> str: return json.dumps( { "camera_mode": camera_mode, "shot_size": shot_size, "angle": angle, "lens": lens, "distance": distance, "orientation": orientation, "phone_visibility": phone_visibility, "priority": priority, "camera_detail": camera_detail, }, ensure_ascii=True, sort_keys=True, ) def _camera_orbit_direction(horizontal_angle: Any) -> str: h_angle = int(float(horizontal_angle or 0)) % 360 if h_angle < 22.5 or h_angle >= 337.5: return "front view" if h_angle < 67.5: return "front-right quarter view" if h_angle < 112.5: return "right side view" if h_angle < 157.5: return "back-right quarter view" if h_angle < 202.5: return "back view" if h_angle < 247.5: return "back-left quarter view" if h_angle < 292.5: return "left side view" return "front-left quarter view" def _camera_orbit_elevation(vertical_angle: Any) -> str: vertical = int(float(vertical_angle or 0)) if vertical < -15: return "low-angle shot" if vertical < 15: return "eye-level shot" if vertical < 45: return "elevated shot" return "high-angle shot" def _camera_orbit_distance(zoom: Any, framing: str = "from_zoom") -> str: framing = framing if framing in CAMERA_ORBIT_FRAMING_CHOICES else "from_zoom" framing_labels = { "wide": "wide shot", "medium": "medium shot", "full_body": "full-body shot", "three_quarter": "three-quarter body shot", "close_up": "close-up", "extreme_close_up": "extreme close-up", } if framing != "from_zoom": return framing_labels[framing] zoom_value = float(zoom or 0.0) if zoom_value < 2: return "wide shot" if zoom_value < 6: return "medium shot" return "close-up" def _camera_orbit_focus(subject_focus: str) -> str: return { "face": "face and expression centered", "torso": "torso and hands centered", "hips": "hips and lower body centered", "full_body": "full body centered", "action": "main action centered", "contact_points": "body contact points centered", "environment": "subject and room both readable", }.get(str(subject_focus or "auto"), "") def camera_orbit_prompt( horizontal_angle: Any, vertical_angle: Any, zoom: Any, framing: str = "from_zoom", subject_focus: str = "auto", include_degrees: bool = True, ) -> tuple[str, dict[str, Any]]: azimuth = max(0, min(359, int(float(horizontal_angle or 0)))) elevation = max(-90, min(90, int(float(vertical_angle or 0)))) zoom_value = max(0.0, min(10.0, float(zoom or 0.0))) direction = _camera_orbit_direction(azimuth) elevation_label = _camera_orbit_elevation(elevation) distance_label = _camera_orbit_distance(zoom_value, framing) focus_label = _camera_orbit_focus(subject_focus) pieces = [direction, elevation_label, distance_label, focus_label] prompt = ", ".join(piece for piece in pieces if piece) if include_degrees: prompt = f"{azimuth}-degree {prompt}" return prompt, { "orbit_azimuth": azimuth, "orbit_elevation": elevation, "orbit_zoom": zoom_value, "orbit_direction": direction, "orbit_elevation_label": elevation_label, "orbit_distance_label": distance_label, "orbit_framing": framing if framing in CAMERA_ORBIT_FRAMING_CHOICES else "from_zoom", "orbit_focus": subject_focus if subject_focus in CAMERA_ORBIT_FOCUS_CHOICES else "auto", } def build_camera_orbit_config_json( enabled: bool = True, camera_mode: str = "standard", horizontal_angle: int = 0, vertical_angle: int = 0, zoom: float = 5.0, framing: str = "from_zoom", subject_focus: str = "auto", lens: str = "auto", orientation: str = "auto", phone_visibility: str = "auto", priority: str = "locked", camera_detail: str = "compact", include_degrees: bool = True, ) -> str: orbit_prompt, orbit_metadata = camera_orbit_prompt( horizontal_angle, vertical_angle, zoom, framing=framing, subject_focus=subject_focus, include_degrees=include_degrees, ) config = { "camera_mode": "disabled" if _is_false(enabled) else _choice(camera_mode, CAMERA_MODE_PROMPTS, "standard"), "shot_size": "auto", "angle": "auto", "lens": _choice(lens, CAMERA_LENS_PROMPTS, "auto"), "distance": "auto", "orientation": _choice(orientation, CAMERA_ORIENTATION_PROMPTS, "auto"), "phone_visibility": _choice(phone_visibility, CAMERA_PHONE_PROMPTS, "auto"), "priority": _choice(priority, CAMERA_PRIORITY_PROMPTS, "locked"), "camera_detail": camera_detail if camera_detail in CAMERA_DETAIL_CHOICES else "compact", "camera_source": "orbit", "custom_camera_prompt": orbit_prompt if not _is_false(enabled) else "", **orbit_metadata, } return json.dumps(config, ensure_ascii=True, sort_keys=True) def _qwen_prompt_camera_values(qwen_prompt: Any) -> tuple[int, int, float]: text = _clean_prompt_punctuation(str(qwen_prompt or "").lower().replace(",", " ")) horizontal_angle = 0 vertical_angle = 0 zoom = 5.0 for label, value in QWEN_CAMERA_DIRECTIONS.items(): if label in text: horizontal_angle = value break for label, value in QWEN_CAMERA_ELEVATIONS.items(): if label in text: vertical_angle = value break for label, value in QWEN_CAMERA_ZOOMS.items(): if label in text: zoom = value break return horizontal_angle, vertical_angle, zoom def _camera_info_dict(camera_info: Any) -> dict[str, Any] | None: if not camera_info: return None if isinstance(camera_info, dict): return camera_info if isinstance(camera_info, str): try: raw = json.loads(camera_info) except json.JSONDecodeError: return None return raw if isinstance(raw, dict) else None return None def _qwen_camera_info_values(camera_info: Any) -> tuple[int, int, float] | None: info = _camera_info_dict(camera_info) if not info: return None position = info.get("position") if isinstance(info.get("position"), dict) else {} target = info.get("target") if isinstance(info.get("target"), dict) else {} try: dx = float(position.get("x", 0.0)) - float(target.get("x", 0.0)) dy = float(position.get("y", QWEN_CAMERA_SCENE_CENTER_Y)) - float( target.get("y", QWEN_CAMERA_SCENE_CENTER_Y) ) dz = float(position.get("z", 0.0)) - float(target.get("z", 0.0)) except (TypeError, ValueError): return None distance = math.sqrt(dx * dx + dy * dy + dz * dz) if distance <= 0: return None horizontal_angle = int(round(math.degrees(math.atan2(dx, dz)))) % 360 vertical_angle = int(round(math.degrees(math.asin(max(-1.0, min(1.0, dy / distance)))))) zoom = max(0.0, min(10.0, ((2.6 - distance) / 2.0) * 10.0)) return horizontal_angle, vertical_angle, round(zoom, 2) def build_qwen_camera_config_json( qwen_prompt: str = "", camera_info: Any = None, prefer_camera_info: bool = True, camera_mode: str = "standard", subject_focus: str = "auto", lens: str = "auto", orientation: str = "auto", phone_visibility: str = "auto", priority: str = "locked", camera_detail: str = "compact", include_degrees: bool = False, suppress_phone_visibility: bool = True, ) -> str: info_values = _qwen_camera_info_values(camera_info) if prefer_camera_info and info_values is not None: horizontal_angle, vertical_angle, zoom = info_values source = "qwen_multiangle_camera_info" else: horizontal_angle, vertical_angle, zoom = _qwen_prompt_camera_values(qwen_prompt) source = "qwen_multiangle_prompt" config = json.loads( build_camera_orbit_config_json( enabled=True, camera_mode=camera_mode, horizontal_angle=horizontal_angle, vertical_angle=vertical_angle, zoom=zoom, framing="from_zoom", subject_focus=subject_focus, lens=lens, orientation=orientation, phone_visibility="auto" if not _is_false(suppress_phone_visibility) else phone_visibility, priority=priority, camera_detail=camera_detail, include_degrees=include_degrees, ) ) config["camera_source"] = source config["qwen_prompt"] = str(qwen_prompt or "").strip() if info_values is not None: config["qwen_camera_info_values"] = { "horizontal_angle": info_values[0], "vertical_angle": info_values[1], "zoom": info_values[2], } return json.dumps(config, ensure_ascii=True, sort_keys=True) def parse_camera_config(camera_config: str | dict[str, Any] | None) -> dict[str, Any]: defaults = { "camera_mode": "standard", "shot_size": "auto", "angle": "auto", "lens": "auto", "distance": "auto", "orientation": "auto", "phone_visibility": "auto", "priority": "strong", "camera_detail": "compact", } if not camera_config: return defaults if isinstance(camera_config, dict): raw = camera_config else: try: raw = json.loads(str(camera_config)) except json.JSONDecodeError as exc: raise ValueError(f"Invalid camera_config JSON: {exc}") from exc if not isinstance(raw, dict): raise ValueError("camera_config must be a JSON object") parsed = {**defaults, **raw} custom_camera_prompt = _clean_prompt_punctuation(parsed.get("custom_camera_prompt", "")).rstrip(".") camera_source = str(parsed.get("camera_source") or "") normalized = { "camera_mode": _choice(parsed.get("camera_mode"), CAMERA_MODE_PROMPTS, defaults["camera_mode"]), "shot_size": _choice(parsed.get("shot_size"), CAMERA_SHOT_PROMPTS, defaults["shot_size"]), "angle": _choice(parsed.get("angle"), CAMERA_ANGLE_PROMPTS, defaults["angle"]), "lens": _choice(parsed.get("lens"), CAMERA_LENS_PROMPTS, defaults["lens"]), "distance": _choice(parsed.get("distance"), CAMERA_DISTANCE_PROMPTS, defaults["distance"]), "orientation": _choice(parsed.get("orientation"), CAMERA_ORIENTATION_PROMPTS, defaults["orientation"]), "phone_visibility": _choice(parsed.get("phone_visibility"), CAMERA_PHONE_PROMPTS, defaults["phone_visibility"]), "priority": _choice(parsed.get("priority"), CAMERA_PRIORITY_PROMPTS, defaults["priority"]), "camera_detail": str(parsed.get("camera_detail") or defaults["camera_detail"]) if str(parsed.get("camera_detail") or defaults["camera_detail"]) in CAMERA_DETAIL_CHOICES else defaults["camera_detail"], } if custom_camera_prompt: normalized["custom_camera_prompt"] = custom_camera_prompt if camera_source: normalized["camera_source"] = camera_source for key in ( "orbit_azimuth", "orbit_elevation", "orbit_zoom", "orbit_direction", "orbit_elevation_label", "orbit_distance_label", "orbit_framing", "orbit_focus", ): if key in parsed: normalized[key] = parsed[key] return normalized def camera_config_with_mode(camera_config: str | dict[str, Any] | None, camera_mode: str) -> dict[str, Any]: parsed = parse_camera_config(camera_config) if camera_mode and camera_mode != "from_camera_config": parsed["camera_mode"] = _choice(camera_mode, CAMERA_MODE_PROMPTS, parsed["camera_mode"]) return parsed def camera_directive(camera_config: str | dict[str, Any] | None) -> tuple[str, dict[str, Any]]: parsed = parse_camera_config(camera_config) if parsed["camera_detail"] == "off" or parsed["camera_mode"] == "disabled": return "", parsed custom_camera_prompt = str(parsed.get("custom_camera_prompt") or "").strip() if parsed["camera_detail"] == "compact": values = [ parsed["camera_mode"], parsed["shot_size"], parsed["angle"], parsed["lens"], parsed["distance"], parsed["orientation"], parsed["phone_visibility"], ] labels = [CAMERA_COMPACT_LABELS.get(value, value.replace("_", " ")) for value in values] labels = [label for value, label in zip(values, labels) if label and value != "auto"] if custom_camera_prompt: labels.append(custom_camera_prompt) if not labels: return "", parsed directive = "Camera: " + ", ".join(labels) + "." if parsed["priority"] == "locked": directive += " Keep this camera framing." return directive, parsed parts = [ CAMERA_MODE_PROMPTS[parsed["camera_mode"]], CAMERA_SHOT_PROMPTS[parsed["shot_size"]], CAMERA_ANGLE_PROMPTS[parsed["angle"]], CAMERA_LENS_PROMPTS[parsed["lens"]], CAMERA_DISTANCE_PROMPTS[parsed["distance"]], CAMERA_ORIENTATION_PROMPTS[parsed["orientation"]], CAMERA_PHONE_PROMPTS[parsed["phone_visibility"]], ] if custom_camera_prompt: parts.append(f"Camera orbit: {custom_camera_prompt}.") parts = [part for part in parts if part] if not parts: return "", parsed parts.append(CAMERA_PRIORITY_PROMPTS[parsed["priority"]]) return " ".join(parts), parsed def camera_caption_text(parsed: dict[str, Any]) -> str: custom_camera_prompt = str(parsed.get("custom_camera_prompt") or "").strip() if custom_camera_prompt: return custom_camera_prompt camera_mode = str(parsed.get("camera_mode") or "").replace("_", " ").strip() if not camera_mode or camera_mode == "standard": return "" return f"{camera_mode} camera framing"