from __future__ import annotations

import re
from typing import Any


PLACEHOLDER_VALUES = {"", "any", "auto", "random", "none", "null"}
PREFERRED_VALUE_KEYS = ("text", "prompt", "template", "value", "name")
METADATA_AXIS_KEYS = {"action_family", "position_family", "position_key", "position_keys"}
ACTION_CONTEXT_PRIORITY = (
    "position",
    "body_position",
    "body_arrangement",
    "arrangement",
    "angle",
    "surface",
    "body_contact",
    "leg_detail",
    "outer_act",
    "contact_detail",
    "texture_detail",
    "hand_detail",
    "visibility",
    "expression_detail",
    "oral_act",
    "oral_detail",
    "penetration_act",
    "penetration_detail",
    "anal_act",
    "double_act",
    "threesome_act",
    "group_act",
)


def clean_text(value: Any) -> str:
    text = "" if value is None else str(value)
    text = text.replace("\n", " ")
    text = re.sub(r"\s+", " ", text).strip()
    text = re.sub(r"\s+([,.;:])", r"\1", text)
    return text


def value_texts(value: Any) -> list[str]:
    if isinstance(value, str):
        text = clean_text(value).strip(" .")
        return [text] if text and text.lower() not in PLACEHOLDER_VALUES else []
    if isinstance(value, (int, float, bool)) or value is None:
        return []
    if isinstance(value, list):
        texts: list[str] = []
        for item in value:
            texts.extend(value_texts(item))
        return texts
    if isinstance(value, dict):
        for preferred in PREFERRED_VALUE_KEYS:
            preferred_texts = value_texts(value.get(preferred))
            if preferred_texts:
                return preferred_texts
        texts: list[str] = []
        for item in value.values():
            texts.extend(value_texts(item))
        return texts
    return []


def axis_value_texts(
    axis_values: Any,
    *,
    priority: tuple[str, ...] = (),
    include_unprioritized: bool = True,
    skip_keys: set[str] | frozenset[str] | tuple[str, ...] = (),
    existing_text: Any = "",
) -> list[str]:
    if not isinstance(axis_values, dict):
        return []
    skipped = {str(key) for key in skip_keys}
    keys: list[str] = []
    for key in priority:
        if key in axis_values and key not in skipped and key not in keys:
            keys.append(key)
    if include_unprioritized:
        for key in axis_values:
            if key not in skipped and key not in keys:
                keys.append(key)

    existing = clean_text(existing_text).lower()
    texts: list[str] = []
    seen: set[str] = set()
    for key in keys:
        for text in value_texts(axis_values.get(key)):
            normalized = clean_text(text).strip(" .")
            lower = normalized.lower()
            if not normalized or lower in seen or (existing and lower in existing):
                continue
            texts.append(normalized)
            seen.add(lower)
    return texts


def action_context_text(axis_values: Any) -> str:
    return " ".join(
        axis_value_texts(
            axis_values,
            priority=ACTION_CONTEXT_PRIORITY,
            include_unprioritized=False,
        )
    )


def row_axis_value_texts(
    row: dict[str, Any],
    *,
    skip_keys: set[str] | frozenset[str] | tuple[str, ...] = (),
    existing_text: Any = "",
) -> list[str]:
    if not isinstance(row, dict):
        return []
    return axis_value_texts(row.get("item_axis_values"), skip_keys=skip_keys, existing_text=existing_text)