ComfyUI-Ethanfel-Prompt-Bui…/tools/sxcp_eval_loop.sh

#!/usr/bin/env bash
set -euo pipefail

usage() {
  cat <<'EOF'
Usage:
  tools/sxcp_eval_loop.sh [minutes] [options]

Loop protocol for Krea2 prompt-generator tuning. Start it right after sending a
prompt to sxcp_eval_out. Every N minutes it writes a structured evaluation
request, prints it, and optionally pipes it to a command. Each cycle should
produce either a prompt-only A/B edit, a generator fix, or a prompt-guide rule.

Options:
  -m, --minutes N       Wait N minutes between evaluation requests.
  -i, --in CHANNEL      Graph-to-agent channel. Default: sxcp_eval_in.
  -o, --out CHANNEL     Agent-to-graph prompt-only channel. Default: sxcp_eval_out.
  -l, --log CHANNEL     Analysis/log channel name. Default: sxcp_eval_log.
  -g, --guide FILE      Durable Krea2 prompt guide. Default: docs/krea2-prompt-guide.md.
  -d, --dir DIR         Runtime log directory. Default: .sxcp_eval.
  --once                Run one wait/check cycle and exit.
  -h, --help            Show this help.

Optional automation:
  SXCP_EVAL_CODEX_CMD   If set, the request is piped to this command.
                        Example: SXCP_EVAL_CODEX_CMD="codex exec"

The command receives the request on stdin and these environment variables:
  SXCP_EVAL_IN_CHANNEL, SXCP_EVAL_OUT_CHANNEL, SXCP_EVAL_LOG_CHANNEL,
  SXCP_EVAL_GUIDE_FILE, SXCP_EVAL_REQUEST_FILE, SXCP_EVAL_CYCLE_DIR,
  SXCP_EVAL_CYCLE.
EOF
}

die() {
  echo "sxcp_eval_loop: $*" >&2
  exit 1
}

is_positive_number() {
  case "${1:-}" in
    ''|*[!0-9.]*|.*.*|0|0.0|0.00) return 1 ;;
    *) return 0 ;;
  esac
}

minutes="${SXCP_EVAL_MINUTES:-}"
in_channel="${SXCP_EVAL_IN_CHANNEL:-sxcp_eval_in}"
out_channel="${SXCP_EVAL_OUT_CHANNEL:-sxcp_eval_out}"
log_channel="${SXCP_EVAL_LOG_CHANNEL:-sxcp_eval_log}"
guide_file="${SXCP_EVAL_GUIDE_FILE:-docs/krea2-prompt-guide.md}"
log_root="${SXCP_EVAL_LOG_DIR:-.sxcp_eval}"
run_once=0

if [ "${1:-}" != "" ] && [ "${1#-}" = "$1" ]; then
  minutes="$1"
  shift
fi

while [ "$#" -gt 0 ]; do
  case "$1" in
    -m|--minutes)
      [ "$#" -ge 2 ] || die "$1 requires a value"
      minutes="$2"
      shift 2
      ;;
    -i|--in)
      [ "$#" -ge 2 ] || die "$1 requires a value"
      in_channel="$2"
      shift 2
      ;;
    -o|--out)
      [ "$#" -ge 2 ] || die "$1 requires a value"
      out_channel="$2"
      shift 2
      ;;
    -l|--log)
      [ "$#" -ge 2 ] || die "$1 requires a value"
      log_channel="$2"
      shift 2
      ;;
    -g|--guide)
      [ "$#" -ge 2 ] || die "$1 requires a value"
      guide_file="$2"
      shift 2
      ;;
    -d|--dir)
      [ "$#" -ge 2 ] || die "$1 requires a value"
      log_root="$2"
      shift 2
      ;;
    --once)
      run_once=1
      shift
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    *)
      die "unknown argument: $1"
      ;;
  esac
done

minutes="${minutes:-5}"
is_positive_number "$minutes" || die "minutes must be a positive number"

mkdir -p "$log_root"
run_id="$(date -u +%Y%m%dT%H%M%SZ)"
run_dir="$log_root/$run_id"
mkdir -p "$run_dir"
events_file="$run_dir/events.tsv"
summary_file="$run_dir/summary.md"

cat > "$summary_file" <<EOF
# SxCP Eval Loop $run_id

- Interval: ${minutes} minute(s)
- Input channel: \`$in_channel\`
- Prompt output channel: \`$out_channel\`
- Log channel: \`$log_channel\`
- Krea2 prompt guide: \`$guide_file\`

## Goal

Tune the SxCP generator so its default Krea2 prompts produce the strongest
possible images for the selected scene, camera, subject, outfit, action, and
style. Every cycle should turn visual evidence into one of:

- a prompt-only A/B edit,
- a durable rule for \`$guide_file\`,
- a generator code/data change with focused test coverage.

## Protocol

1. Pull the latest prompt/image from \`$in_channel\`.
2. Compare the image against the prompt and previous edited prompt.
3. Identify concrete Krea2 mismatches and likely generator path.
4. Classify the next step: prompt-only edit, guide rule, or generator patch.
5. Push only the next test prompt to \`$out_channel\`.
6. Keep analysis in chat or \`$log_channel\`, not in \`$out_channel\`.
7. Edit generator code/data only when the issue is systemic.
8. Update \`$guide_file\` when a wording rule is confirmed.
9. Run focused smoke tests after generator edits.

## Cycles

EOF

printf 'cycle\tutc_time\trequest_file\tstatus\n' > "$events_file"

cycle=0
while :; do
  cycle=$((cycle + 1))
  echo "sxcp_eval_loop: cycle $cycle waiting ${minutes} minute(s) before requesting evaluation..."
  sleep "${minutes}m"

  stamp="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
  cycle_dir="$run_dir/cycle_$(printf '%03d' "$cycle")"
  mkdir -p "$cycle_dir"
  request_file="$cycle_dir/request.md"

  cat > "$request_file" <<EOF
Please run SxCP eval cycle $cycle now.

Primary goal:
- Tune the generator for better Krea2 images, not just one isolated image.
- Maintain/update the durable Krea2 prompt guide at: $guide_file

Channels:
- Pull latest graph output from: $in_channel
- Push prompt-only replacement to: $out_channel
- Put analysis/log text in chat or: $log_channel

Evaluation steps:
1. Pull the latest payload from $in_channel.
2. Inspect image_path and compare it to the prompt text.
3. Score these Krea2 axes: identity, outfit continuity, pose/action, camera compliance, location coherence, crop/framing, prompt noise, model confusion tokens, and overall image usefulness.
4. Identify the smallest concrete mismatch that should be tested next.
5. Classify the finding:
   - prompt-only: push exactly one edited prompt to $out_channel and nothing else on that channel.
   - guide-rule: update $guide_file with the confirmed Krea2 wording rule.
   - generator-fix: edit the responsible generator path, add/adjust focused smoke coverage, run tests, and summarize the change.
6. Keep a clear link between the image evidence, the prompt wording, and the generator path.
7. Append the finding to the eval log with: original issue, changed wording/path, expected improvement, test result, guide update, generator update, and next hypothesis.

Current run:
- run_id: $run_id
- cycle: $cycle
- generated_at_utc: $stamp
- request_file: $request_file
- guide_file: $guide_file
EOF

  {
    echo
    echo "### Cycle $cycle - $stamp"
    echo
    echo "- Request: \`$request_file\`"
    echo "- Status: pending evaluation"
  } >> "$summary_file"
  printf '%s\t%s\t%s\t%s\n' "$cycle" "$stamp" "$request_file" "pending" >> "$events_file"

  echo
  echo "================ SxCP Eval Request ================"
  cat "$request_file"
  echo "==================================================="
  echo

  if [ "${SXCP_EVAL_CODEX_CMD:-}" != "" ]; then
    echo "sxcp_eval_loop: piping request to SXCP_EVAL_CODEX_CMD"
    SXCP_EVAL_IN_CHANNEL="$in_channel" \
    SXCP_EVAL_OUT_CHANNEL="$out_channel" \
    SXCP_EVAL_LOG_CHANNEL="$log_channel" \
    SXCP_EVAL_GUIDE_FILE="$guide_file" \
    SXCP_EVAL_REQUEST_FILE="$request_file" \
    SXCP_EVAL_CYCLE_DIR="$cycle_dir" \
    SXCP_EVAL_CYCLE="$cycle" \
      sh -c "$SXCP_EVAL_CODEX_CMD" < "$request_file"
  fi

  if [ "$run_once" -eq 1 ]; then
    break
  fi
done

echo "sxcp_eval_loop: log written to $run_dir"