Extract formatter input parsing policy

This commit is contained in:
2026-06-27 01:22:07 +02:00
parent b54b8b9421
commit 4c45d96472
7 changed files with 239 additions and 159 deletions
+9 -51
View File
@@ -1,13 +1,14 @@
from __future__ import annotations
import json
import re
from typing import Any
try:
from . import formatter_input as input_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
except ImportError: # Allows local smoke tests with `python -c`.
import formatter_input as input_policy
from hardcore_action_metadata import normalize_hardcore_action_family
from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
@@ -95,74 +96,31 @@ def sdxl_quality_preset_choices() -> list[str]:
def _clean(value: Any) -> str:
text = "" if value is None else str(value)
text = text.replace("\n", " ")
text = re.sub(r"\s+", " ", text).strip()
text = re.sub(r"\s+([,.;:])", r"\1", text)
return text
return input_policy.clean_text(value)
def _maybe_json(text: str) -> dict[str, Any] | None:
text = _clean(text)
if not text.startswith("{"):
return None
try:
value = json.loads(text)
except json.JSONDecodeError:
return None
return value if isinstance(value, dict) else None
return input_policy.maybe_json(text)
def _row_from_inputs(source_text: str, metadata_json: str, input_hint: str) -> tuple[dict[str, Any] | None, str]:
if input_hint in ("auto", "metadata_json"):
for text, method in ((metadata_json, "metadata_json"), (source_text, "source_json")):
row = _maybe_json(text)
if row is not None:
return row, method
return None, "text"
return input_policy.row_from_inputs(source_text, metadata_json, input_hint)
def _strip_trigger(text: str, preserve_trigger: bool) -> str:
text = _clean(text)
if preserve_trigger:
return text
for trigger in TRIGGER_CANDIDATES:
if text.lower().startswith(trigger.lower() + ","):
return text[len(trigger) + 1 :].strip(" ,")
if text.lower().startswith(trigger.lower() + "."):
return text[len(trigger) + 1 :].strip(" ,")
return text
return input_policy.strip_trigger_prefix(text, TRIGGER_CANDIDATES, preserve_trigger=preserve_trigger)
def _split_avoid(text: str) -> tuple[str, str]:
match = re.search(r"\bAvoid:\s*(.*)$", text)
if not match:
return text, ""
return text[: match.start()].strip(" ."), match.group(1).strip(" .")
return input_policy.split_avoid(text)
def _prompt_field(text: str, label: str) -> str:
text = _clean(text)
if not text:
return ""
labels = "|".join(re.escape(name) for name in PROMPT_FIELD_LABELS)
pattern = rf"{re.escape(label)}:\s*(.*?)(?=\. (?:{labels}):|\. Use\b|\. Avoid\b|$)"
match = re.search(pattern, text)
if not match:
return ""
return _clean(match.group(1)).rstrip(".")
return input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS)
def _row_value(row: dict[str, Any], key: str, labels: tuple[str, ...] = ()) -> str:
value = _clean(row.get(key, ""))
if value:
return value
prompt = _clean(row.get("prompt", ""))
for label in labels:
value = _prompt_field(prompt, label)
if value:
return value
return ""
return input_policy.row_value(row, key, labels, field_labels=PROMPT_FIELD_LABELS)
def _split_tag_text(text: Any) -> list[str]: