diff --git a/README.md b/README.md index 4b3e795..c3429d0 100644 --- a/README.md +++ b/README.md @@ -392,6 +392,14 @@ The translator accepts the Qwen labels such as `front-right quarter view`, as the native camera nodes. `suppress_phone_visibility` is enabled by default so generic Qwen camera views do not add `phone hidden` or other phone wording. +For coworking-style locations, the prompt builder also uses the translated +camera geometry to add a location-aware framing sentence. It currently targets +`coworking lounge`, `business cafe`, and empty office scenes: front/side/back +views, zoom, and elevation change which desks, windows, laptop tables, glass +partitions, counters, or office rows are kept visible. In male-POV setups this +becomes a first-person spatial description and the external camera sentence is +suppressed. + `SxCP Caption Naturalizer` rewrites tag-like captions or labeled prompts into more natural language. Connect the prompt builder's `metadata_json` output to `source_text` for the cleanest result. You can also connect `caption` or diff --git a/caption_naturalizer.py b/caption_naturalizer.py index 01f37ba..6c4b8d4 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -376,6 +376,7 @@ def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) - pose = _row_value(row, "pose", ("Pose",)) expression = "" if _expression_disabled(row) else _row_value(row, "expression", ("Facial expression", "Facial expressions")) composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) + camera_scene = _clean_text(row.get("camera_scene_directive")) prop = _row_value(row, "prop", ("Prop/detail",)) style = _row_value(row, "style") if keep_style else "" @@ -401,6 +402,8 @@ def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) - parts.append(f"{possessive_pronoun(subject)} expression is {expression}") if scene: parts.append(f"The setting is {scene}") + if _detail_allows(detail_level) and camera_scene: + parts.append(camera_scene) if _detail_allows(detail_level) and composition: parts.append(f"The composition is {composition}") if keep_style and style: @@ -447,6 +450,7 @@ def _couple_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) - if not _expression_disabled(row): expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) + camera_scene = _clean_text(row.get("camera_scene_directive")) style = _row_value(row, "style") if keep_style else "" parts = [f"{_cap_first(subject)} are adults"] @@ -460,6 +464,8 @@ def _couple_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) - parts.append(f"The pose is {pose}") if scene: parts.append(f"The setting is {scene}") + if _detail_allows(detail_level) and camera_scene: + parts.append(camera_scene) if expression: parts.append(f"Their expressions are {expression}") if _detail_allows(detail_level) and composition: @@ -484,6 +490,7 @@ def _configured_cast_from_row(row: dict[str, Any], detail_level: str, keep_style if not _expression_disabled(row): expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) + camera_scene = _clean_text(row.get("camera_scene_directive")) cast_descriptor_text = _row_value(row, "cast_descriptor_text", ("Characters", "Cast descriptors")) scene_kind = _row_value(row, "scene_kind") or "explicit adult sex scene" style = _row_value(row, "style") if keep_style else "" @@ -506,6 +513,8 @@ def _configured_cast_from_row(row: dict[str, Any], detail_level: str, keep_style scene_bits.append(f"framed as {composition}") if scene_bits and _detail_allows(detail_level): parts.append(", ".join(scene_bits)) + if _detail_allows(detail_level) and camera_scene: + parts.append(camera_scene) if keep_style and style: parts.append(f"The visual style is {style}") return _join_sentences(parts), "metadata(configured_cast)" @@ -524,6 +533,7 @@ def _group_or_layout_from_row(row: dict[str, Any], detail_level: str, keep_style if not _expression_disabled(row): expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) + camera_scene = _clean_text(row.get("camera_scene_directive")) style = _row_value(row, "style") if keep_style else "" if primary == "layout scene": @@ -540,6 +550,8 @@ def _group_or_layout_from_row(row: dict[str, Any], detail_level: str, keep_style parts.append(f"They show {expression}") if scene: parts.append(f"The setting is {scene}") + if _detail_allows(detail_level) and camera_scene: + parts.append(camera_scene) if _detail_allows(detail_level) and composition: parts.append(f"The composition is {composition}") if keep_style and style: diff --git a/krea_formatter.py b/krea_formatter.py index 7cfe3a6..61ad856 100644 --- a/krea_formatter.py +++ b/krea_formatter.py @@ -2140,6 +2140,10 @@ def _camera_phrase(row: dict[str, Any]) -> str: return "" +def _camera_scene_phrase(row: dict[str, Any]) -> str: + return _clean(row.get("camera_scene_directive")) + + def _camera_phrase_from_config(config: Any) -> str: if not isinstance(config, dict): return "" @@ -2219,6 +2223,7 @@ def _normal_row_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) flags=re.IGNORECASE, ) camera = _camera_phrase(row) + camera_scene = _camera_scene_phrase(row) style = _style_phrase(row, style_mode) if subject_type == "configured_cast" or _clean(row.get("cast_summary")): @@ -2264,6 +2269,7 @@ def _normal_row_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) f"A consensual explicit adult scene with {subject}" if not action else "", f"The cast includes {cast}" if cast and not cast_prose and not (women_count == 1 and men_count == 1) else "", f"The setting is {scene}" if scene else "", + camera_scene, _expression_phrase(expression), _composition_phrase(output_composition, action, "The image is framed as", detail_density), camera, @@ -2281,6 +2287,7 @@ def _normal_row_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) f"{pose}" if pose else "", f"with {expression}" if expression else "", f"in {scene}" if scene else "", + camera_scene, f"framed as {composition}" if composition else "", camera, style if detail_level != "concise" else "", @@ -2300,6 +2307,7 @@ def _normal_row_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) _couple_clothing_phrase(item) if item else "", f"The pose is {pose}" if pose else "", f"The setting is {scene}" if scene else "", + camera_scene, f"Facial expressions are {expression}" if expression else "", f"The image is framed as {composition}" if composition else "", camera, @@ -2312,6 +2320,7 @@ def _normal_row_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) f"{subject}", f"featuring {item}" if item else "", f"in {scene}" if scene else "", + camera_scene, f"with {expression}" if expression else "", f"framed as {composition}" if composition else "", camera, @@ -2332,6 +2341,8 @@ def _insta_pair_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) hard = row.get("hardcore_row") if isinstance(row.get("hardcore_row"), dict) else {} soft_camera = _pair_camera_phrase(row.get("softcore_camera_directive"), row.get("softcore_camera_config"), soft) hard_camera = _pair_camera_phrase(row.get("hardcore_camera_directive"), row.get("hardcore_camera_config"), hard) + soft_camera_scene = _camera_scene_phrase(soft) or _clean(row.get("softcore_camera_scene_directive")) + hard_camera_scene = _camera_scene_phrase(hard) or _clean(row.get("hardcore_camera_scene_directive")) soft_style = _style_phrase(soft, style_mode) hard_style = _style_phrase(hard, style_mode) options = row.get("options") if isinstance(row.get("options"), dict) else {} @@ -2452,6 +2463,7 @@ def _insta_pair_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) f"{soft.get('pose')}" if soft.get("pose") else "", _expression_phrase(soft_expression), f"in {soft.get('scene_text')}" if soft.get("scene_text") else "", + soft_camera_scene, f"framed as {soft_output_composition}" if soft_output_composition else "", soft_camera, soft_style if detail_level != "concise" else "", @@ -2465,6 +2477,7 @@ def _insta_pair_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) ), hard_cast_prose, f"set in {hard_scene}" if hard_scene else "", + hard_camera_scene, _expression_phrase(hard_expression), _composition_phrase(hard_output_composition, hard_action, detail_density=hard_detail_density), hard_camera, diff --git a/prompt_builder.py b/prompt_builder.py index 742bca2..4ed7778 100644 --- a/prompt_builder.py +++ b/prompt_builder.py @@ -3440,15 +3440,223 @@ def _camera_caption_text(parsed: dict[str, Any]) -> str: return f"{camera_mode} camera framing" +def _is_coworking_scene(scene_text: Any) -> bool: + text = str(scene_text or "").lower() + return any( + term in text + for term in ( + "coworking", + "cowork", + "office lounge", + "business cafe", + "work cafe", + "shared office", + "corporate office", + "office after hours", + "laptops", + "warm desks", + "repeating desks", + "glass partitions", + "copier alcove", + ) + ) + + +def _camera_geometry_phrase(parsed: dict[str, Any]) -> str: + direction = str(parsed.get("orbit_direction") or "").strip() + elevation = str(parsed.get("orbit_elevation_label") or "").strip() + distance = str(parsed.get("orbit_distance_label") or "").strip() + custom = str(parsed.get("custom_camera_prompt") or "").strip() + if not any((direction, elevation, distance)) and custom: + return custom + parts = [part for part in (direction, elevation, distance) if part and part != "auto"] + if parts: + return ", ".join(parts) + compact_parts = [ + CAMERA_COMPACT_LABELS.get(str(parsed.get(key) or ""), str(parsed.get(key) or "").replace("_", " ")) + for key in ("shot_size", "angle", "distance") + ] + compact_parts = [part for part in compact_parts if part and part != "auto"] + return ", ".join(compact_parts) + + +def _camera_direction_from_text(text: Any) -> str: + source = str(text or "").lower() + for label in ( + "front-right quarter view", + "right side view", + "back-right quarter view", + "back view", + "back-left quarter view", + "left side view", + "front-left quarter view", + "front view", + ): + if label in source: + return label + return "" + + +def _camera_elevation_from_text(text: Any) -> str: + source = str(text or "").lower() + for label in ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot"): + if label in source: + return label + return "" + + +def _camera_distance_from_text(text: Any) -> str: + source = str(text or "").lower() + for label in ("wide shot", "full-body shot", "three-quarter body shot", "medium shot", "close-up", "extreme close-up"): + if label in source: + return label + return "" + + +def _coworking_location_profile(scene_text: Any) -> dict[str, str]: + text = str(scene_text or "").lower() + if "business cafe" in text or "work cafe" in text or "cafe" in text: + return { + "place": "business cafe coworking counter", + "foreground": "counter edge, small plant, laptop corner, and polished phone-check surface", + "midground": "bar stools, warm desk lamps, coffee counter, and laptop users' empty work spots", + "background": "plants, mirror strip, menu wall, and repeated cafe work tables", + } + if "corporate office" in text or "office after hours" in text or "copier" in text: + return { + "place": "empty after-hours office", + "foreground": "copier alcove edge, office chair backs, and the nearest desk corner", + "midground": "repeating desks, glass partition seams, blinds, and muted monitor glow", + "background": "rows of empty workstations, city-light windows, and quiet office depth", + } + return { + "place": "coworking lounge", + "foreground": "nearest desk edge, laptop corner, chair back, and polished tabletop line", + "midground": "warm work desks, laptop tables, glass partition seams, and open walking aisle", + "background": "tall windows, repeated desk rows, plants, and soft shared-office depth", + } + + +def _coworking_direction_detail( + direction: str, + profile: dict[str, str], + pov_labels: list[str] | None = None, +) -> str: + direction = str(direction or "").strip().lower() + foreground = profile["foreground"] + midground = profile["midground"] + background = profile["background"] + if pov_labels: + if "right side" in direction: + return f"the visible partner is in right-side profile across the lower foreground: {foreground}; behind them, {midground} runs horizontally toward {background}" + if "left side" in direction: + return f"the visible partner is in left-side profile across the lower foreground: {foreground}; behind them, {midground} runs horizontally toward {background}" + if "back-right" in direction or "back-left" in direction: + return f"the viewer sees the visible partner from a rear-quarter angle, turning back over one shoulder; {foreground} sits at the lower edge while {midground} leads into {background}" + if direction == "back view": + return f"the viewer looks past the visible partner's back toward {midground}, then into {background}, with foreground body cues low in frame" + if "front-right" in direction or "front-left" in direction: + return f"the visible partner is close in a front-quarter view over the lower foreground: {foreground}; {midground} recede diagonally into {background}" + return f"the visible partner faces the viewer over the lower foreground: {foreground}; {midground} stacks clearly in front of {background}" + if "right side" in direction or "left side" in direction: + return f"the cast is held in clean side profile along the foreground anchor: {foreground}; {midground} creates horizontal perspective lines, with {background} still visible" + if "back-right" in direction or "back-left" in direction: + return f"the cast is viewed from a rear-quarter angle, partly turning back toward the camera; {foreground} stays low in frame while {midground} leads into {background}" + if direction == "back view": + return f"the cast is seen from behind with {foreground} at the camera side, facing into {midground} and {background}" + if "front-right" in direction or "front-left" in direction: + return f"the cast is placed beside the foreground anchor: {foreground}; {midground} recede diagonally into {background}" + return f"the cast faces the camera beside the foreground anchor: {foreground}; {midground} is layered between the cast and {background}" + + +def _coworking_distance_detail(distance: str, profile: dict[str, str]) -> str: + distance = str(distance or "").strip().lower() + if "wide" in distance or "full-body" in distance or "full body" in distance: + return f"Keep full bodies plus floor aisle, table rows, and enough {profile['background']} to read the whole {profile['place']}." + if "close" in distance: + return f"Crop close, but keep one concrete location anchor visible: {profile['foreground']} or a slice of {profile['midground']}." + return f"Use a medium crop: bodies stay dominant, but the foreground anchor ({profile['foreground']}) and one midground layer ({profile['midground']}) remain visible." + + +def _coworking_elevation_detail(elevation: str, profile: dict[str, str]) -> str: + elevation = str(elevation or "").strip().lower() + if "low-angle" in elevation: + return f"Low viewpoint: let {profile['foreground']} loom at the lower edge while windows and partitions rise behind the bodies." + if "elevated" in elevation: + return f"Elevated viewpoint: show tabletop surfaces, laptop rectangles, chair positions, and the walking aisle around the bodies." + if "high-angle" in elevation: + return f"High viewpoint: look down over the grid of desks, chairs, floor aisle, and body placement so the room layout is explicit." + return f"Eye-level viewpoint: keep tabletop lines and glass seams straight enough to make the {profile['place']} believable." + + +def _coworking_camera_scene_directive( + scene_text: Any, + parsed: dict[str, Any], + pov_labels: list[str] | None = None, +) -> str: + if not _is_coworking_scene(scene_text): + return "" + direction = str(parsed.get("orbit_direction") or "").strip() + elevation = str(parsed.get("orbit_elevation_label") or "").strip() + distance = str(parsed.get("orbit_distance_label") or "").strip() + custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip() + direction = direction or _camera_direction_from_text(custom_prompt) + elevation = elevation or _camera_elevation_from_text(custom_prompt) + distance = distance or _camera_distance_from_text(custom_prompt) + if not any((direction, elevation, distance, custom_prompt)): + return "" + profile = _coworking_location_profile(scene_text) + direction_detail = _coworking_direction_detail(direction, profile, pov_labels) + distance_detail = _coworking_distance_detail(distance, profile) + elevation_detail = _coworking_elevation_detail(elevation, profile) + if pov_labels: + return ( + f"From the POV participant's position inside the {profile['place']}, {direction_detail}. " + f"{distance_detail} {elevation_detail} Use the multiangle camera only as spatial geometry for what the viewer can see." + ) + geometry = _camera_geometry_phrase(parsed) + geometry_clause = f" from a {geometry}" if geometry else "" + return ( + f"In the {profile['place']}{geometry_clause}, {direction_detail}. " + f"{distance_detail} {elevation_detail}" + ) + + +def _camera_scene_directive_for_context( + scene_text: Any, + composition: Any, + camera_config: str | dict[str, Any] | None, + pov_labels: list[str] | None = None, +) -> tuple[str, dict[str, Any]]: + parsed = _parse_camera_config(camera_config) + if parsed["camera_detail"] == "off" or parsed["camera_mode"] == "disabled": + return "", parsed + return _coworking_camera_scene_directive(scene_text, parsed, pov_labels), parsed + + def _apply_camera_config(row: dict[str, Any], camera_config: str | dict[str, Any] | None) -> dict[str, Any]: directive, parsed = _camera_directive(camera_config) + pov_labels = _pov_character_labels( + _character_slot_label_map(_parse_character_cast(row.get("character_cast_slots"))), + int(row.get("men_count") or 0) if str(row.get("men_count") or "").isdigit() else 0, + ) + if not pov_labels: + pov_labels = [str(label) for label in _list_from(row.get("pov_character_labels")) if str(label).strip()] + scene_directive, parsed = _camera_scene_directive_for_context( + row.get("scene_text") or row.get("source_scene_text") or row.get("scene"), + row.get("composition") or row.get("source_composition"), + parsed, + pov_labels, + ) row["camera_config"] = parsed - row["camera_directive"] = directive - if not directive: + row["camera_scene_directive"] = scene_directive + row["camera_directive"] = "" if pov_labels else directive + combined_directive = " ".join(part for part in (scene_directive, row["camera_directive"]) if part) + if not combined_directive: return row - row["prompt"] = _insert_positive_directive(row["prompt"], directive) + row["prompt"] = _insert_positive_directive(row["prompt"], combined_directive) camera_caption = _camera_caption_text(parsed) - if camera_caption: + if camera_caption and not pov_labels: row["caption"] = f"{row.get('caption', '').rstrip()}, {camera_caption}" return row @@ -7981,10 +8189,39 @@ def build_insta_of_pair( hard_camera_config = _insta_camera_config_with_detail(hard_camera_config, options["camera_detail"]) soft_camera_directive, soft_camera_config = _camera_directive(soft_camera_config) hard_camera_directive, hard_camera_config = _camera_directive(hard_camera_config) - soft_camera_sentence = f"Camera control: {soft_camera_directive} " if soft_camera_directive else "" - hard_camera_sentence = f"Camera control: {hard_camera_directive} " if hard_camera_directive else "" hard_scene = soft_row["scene_text"] if options["continuity"] == "same_creator_same_room" else hard_row["scene_text"] hard_composition = hard_row["composition"] + soft_pov_camera_labels = ( + pov_character_labels + if options["softcore_cast"] == "same_as_hardcore" + else [] + ) + soft_camera_scene_directive, soft_camera_config = _camera_scene_directive_for_context( + soft_row.get("scene_text"), + soft_row.get("composition"), + soft_camera_config, + soft_pov_camera_labels, + ) + hard_camera_scene_directive, hard_camera_config = _camera_scene_directive_for_context( + hard_scene, + hard_composition, + hard_camera_config, + pov_character_labels, + ) + if soft_pov_camera_labels: + soft_camera_directive = "" + if pov_character_labels: + hard_camera_directive = "" + soft_row["camera_config"] = soft_camera_config + soft_row["camera_directive"] = soft_camera_directive + soft_row["camera_scene_directive"] = soft_camera_scene_directive + hard_row["camera_config"] = hard_camera_config + hard_row["camera_directive"] = hard_camera_directive + hard_row["camera_scene_directive"] = hard_camera_scene_directive + soft_camera_scene_sentence = f"{soft_camera_scene_directive} " if soft_camera_scene_directive else "" + hard_camera_scene_sentence = f"{hard_camera_scene_directive} " if hard_camera_scene_directive else "" + soft_camera_sentence = f"Camera control: {soft_camera_directive} " if soft_camera_directive else "" + hard_camera_sentence = f"Camera control: {hard_camera_directive} " if hard_camera_directive else "" soft_cast = ( "solo creator setup with Woman A alone" if options["softcore_cast"] == "solo" @@ -8065,6 +8302,7 @@ def build_insta_of_pair( f"{soft_cast_presence}" f"{soft_cast_styling_sentence}" f"{soft_row['softcore_item_prompt_label']}: {soft_row['item']}. Pose: {soft_row['pose']}. Setting: {soft_row['scene_text']}. " + f"{soft_camera_scene_sentence}" f"{_labeled_expression_sentence('Facial expression', soft_row.get('expression'))}" f"Composition: {soft_row['composition']}. " f"{soft_camera_sentence}" @@ -8080,6 +8318,7 @@ def build_insta_of_pair( f"{hard_clothing_sentence}" f"Role graph: {hard_row['role_graph']} Sexual scene: {hard_row['item']}. " f"Setting: {hard_scene}. " + f"{hard_camera_scene_sentence}" f"{_labeled_expression_sentence('Facial expressions', hard_row.get('expression'))}" f"Composition: {hard_composition}. " f"{hard_detail_directive}" @@ -8104,6 +8343,7 @@ def build_insta_of_pair( soft_partner_outfit_text, soft_partner_styling["pose"], soft_row["scene_text"], + soft_camera_scene_directive, soft_row["composition"], _camera_caption_text(soft_camera_config) if soft_camera_directive else "", ] @@ -8117,6 +8357,7 @@ def build_insta_of_pair( hard_row["role_graph"], hard_row["item"], hard_scene, + hard_camera_scene_directive, hard_composition, _camera_caption_text(hard_camera_config) if hard_camera_directive else "", ] @@ -8150,5 +8391,7 @@ def build_insta_of_pair( "hardcore_camera_config": hard_camera_config, "softcore_camera_directive": soft_camera_directive, "hardcore_camera_directive": hard_camera_directive, + "softcore_camera_scene_directive": soft_camera_scene_directive, + "hardcore_camera_scene_directive": hard_camera_scene_directive, } return metadata