Add prompt control and filter options

2026-06-24 14:32:54 +02:00
parent 2b64499687
commit cb35e1881f
5 changed files with 592 additions and 99 deletions
@@ -2782,21 +2782,122 @@ WHITE_KEYWORDS = (
    "french",
    "mediterranean",
 )
+EAST_ASIAN_KEYWORDS = (
+    "east asian",
+    "japanese",
+    "korean",
+    "chinese",
+    "taiwanese",
+    "mongolian",
+    "tibetan",
+    "manchu",
+    "okinawan",
+)
+SOUTHEAST_ASIAN_KEYWORDS = (
+    "southeast asian",
+    "vietnamese",
+    "thai",
+    "filipina",
+    "filipino",
+    "indonesian",
+    "malay",
+    "cambodian",
+    "lao",
+    "burmese",
+    "singaporean",
+    "hmong",
+    "balinese",
+)
+SOUTH_ASIAN_KEYWORDS = (
+    "south asian",
+    "indian",
+    "punjabi",
+    "tamil",
+    "bengali",
+    "sri lankan",
+    "nepali",
+    "pakistani",
+    "gujarati",
+    "bangladeshi",
+    "malayali",
+    "kashmiri",
+)
+MEDITERRANEAN_MENA_KEYWORDS = (
+    "mediterranean",
+    "greek",
+    "italian",
+    "spanish",
+    "portuguese",
+    "turkish",
+    "persian",
+    "levantine",
+    "maghrebi",
+    "egyptian",
+    "moroccan",
+    "amazigh",
+    "kurdish",
+    "middle-eastern",
+    "middle eastern",
+    "mena",
+    "olive",
+)
+LATINA_KEYWORDS = (
+    "latina",
+    "latino",
+    "mexican",
+    "chicana",
+    "colombian",
+    "brazilian",
+    "puerto rican",
+    "cuban",
+    "dominican",
+    "venezuelan",
+    "peruvian",
+    "chilean",
+    "argentine",
+    "uruguayan",
+    "ecuadorian",
+)
+BLACK_AFRICAN_KEYWORDS = ("african", "african-diaspora", "cape verdean")
+INDIGENOUS_KEYWORDS = ("indigenous", "amazigh")
+MIXED_KEYWORDS = ("mixed",)
+ETHNICITY_KEYWORD_GROUPS = {
+    "asian": ASIAN_KEYWORDS,
+    "white_asian": WHITE_KEYWORDS + ASIAN_KEYWORDS,
+    "european": WHITE_KEYWORDS,
+    "mediterranean_mena": MEDITERRANEAN_MENA_KEYWORDS,
+    "latina": LATINA_KEYWORDS,
+    "east_asian": EAST_ASIAN_KEYWORDS,
+    "southeast_asian": SOUTHEAST_ASIAN_KEYWORDS,
+    "south_asian": SOUTH_ASIAN_KEYWORDS,
+    "black_african": BLACK_AFRICAN_KEYWORDS,
+    "indigenous": INDIGENOUS_KEYWORDS,
+    "mixed": MIXED_KEYWORDS,
+}


 def by_ethnicity(pool: list, ethnicity: str) -> list:
    """Filter an appearance pool by heritage keywords found in the skin field.
    'asian' = East/Southeast/South/Central Asian; 'white_asian' = white/European + Asian;
    'any' returns the full pool."""
+    ethnicity = str(ethnicity or "any").strip().lower()
    if ethnicity == "any":
        return pool
-    if ethnicity == "asian":
-        kws = ASIAN_KEYWORDS
-    elif ethnicity == "white_asian":
-        kws = WHITE_KEYWORDS + ASIAN_KEYWORDS
-    else:
-        kws = (ethnicity,)
+    tokens = [token.strip() for token in re.split(r"[,+|;/\s]+", ethnicity) if token.strip()]
+    kws: list[str] = []
+    exclude_kws: list[str] = []
+    for token in tokens:
+        if token.startswith("exclude_"):
+            exclude_key = token.removeprefix("exclude_")
+            exclude_kws.extend(ETHNICITY_KEYWORD_GROUPS.get(exclude_key, (exclude_key,)))
+            continue
+        if token in ("not_mixed", "no_mixed"):
+            exclude_kws.extend(MIXED_KEYWORDS)
+            continue
+        kws.extend(ETHNICITY_KEYWORD_GROUPS.get(token, (token,)))
    filtered = [e for e in pool if any(k in e[3].lower() for k in kws)]
+    if exclude_kws:
+        filtered = [e for e in filtered if not any(k in e[3].lower() for k in exclude_kws)]
    return filtered or pool


@@ -2894,10 +2995,7 @@ def make_single(index: int, batch: int, rng: random.Random, gender: str, expr_de
        clothes = choose(rng, WOMEN_CLOTHES_MINIMAL if minimal else WOMEN_CLOTHES)
        figure_note = choose(rng, figure_pool(figure))
    else:
-        # The ethnicity bias targets women; men stay any-heritage unless the
-        # batch is a fully-themed 'asian' batch.
-        men_eth = ethnicity if ethnicity == "asian" else "any"
-        men_pool = by_ethnicity(MEN, men_eth)
+        men_pool = by_ethnicity(MEN, ethnicity)
        subject, age, body, skin, hair, eyes = choose(rng, men_pool)
        clothes = choose(rng, MEN_CLOTHES_MINIMAL if minimal else MEN_CLOTHES)
        figure_note = ""
@@ -3247,11 +3345,10 @@ def main() -> None:
    )
    parser.add_argument(
        "--ethnicity",
-        choices=["any", "asian", "white_asian"],
+        choices=["any", *ETHNICITY_KEYWORD_GROUPS.keys()],
        default="any",
-        help="'any' = balanced heritage mix (default); "
-        "'asian' = restrict all subjects to Asian (East/Southeast/South/Central Asian); "
-        "'white_asian' = bias women to white/European + Asian (men stay any heritage).",
+        help="'any' = balanced heritage mix (default). Other values restrict the appearance pool "
+        "by heritage keywords, e.g. east_asian, latina, black_african, mixed, asian, or white_asian.",
    )
    parser.add_argument(
        "--poses",