229 lines
6.9 KiB
Bash
Executable File
229 lines
6.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage:
|
|
tools/sxcp_eval_loop.sh [minutes] [options]
|
|
|
|
Loop protocol for Krea2 prompt-generator tuning. Start it right after sending a
|
|
prompt to sxcp_eval_out. Every N minutes it writes a structured evaluation
|
|
request, prints it, and optionally pipes it to a command. Each cycle should
|
|
produce either a prompt-only A/B edit, a generator fix, or a prompt-guide rule.
|
|
|
|
Options:
|
|
-m, --minutes N Wait N minutes between evaluation requests.
|
|
-i, --in CHANNEL Graph-to-agent channel. Default: sxcp_eval_in.
|
|
-o, --out CHANNEL Agent-to-graph prompt-only channel. Default: sxcp_eval_out.
|
|
-l, --log CHANNEL Analysis/log channel name. Default: sxcp_eval_log.
|
|
-g, --guide FILE Durable Krea2 prompt guide. Default: docs/krea2-prompt-guide.md.
|
|
-d, --dir DIR Runtime log directory. Default: .sxcp_eval.
|
|
--once Run one wait/check cycle and exit.
|
|
-h, --help Show this help.
|
|
|
|
Optional automation:
|
|
SXCP_EVAL_CODEX_CMD If set, the request is piped to this command.
|
|
Example: SXCP_EVAL_CODEX_CMD="codex exec"
|
|
|
|
The command receives the request on stdin and these environment variables:
|
|
SXCP_EVAL_IN_CHANNEL, SXCP_EVAL_OUT_CHANNEL, SXCP_EVAL_LOG_CHANNEL,
|
|
SXCP_EVAL_GUIDE_FILE, SXCP_EVAL_REQUEST_FILE, SXCP_EVAL_CYCLE_DIR,
|
|
SXCP_EVAL_CYCLE.
|
|
EOF
|
|
}
|
|
|
|
die() {
|
|
echo "sxcp_eval_loop: $*" >&2
|
|
exit 1
|
|
}
|
|
|
|
is_positive_number() {
|
|
case "${1:-}" in
|
|
''|*[!0-9.]*|.*.*|0|0.0|0.00) return 1 ;;
|
|
*) return 0 ;;
|
|
esac
|
|
}
|
|
|
|
minutes="${SXCP_EVAL_MINUTES:-}"
|
|
in_channel="${SXCP_EVAL_IN_CHANNEL:-sxcp_eval_in}"
|
|
out_channel="${SXCP_EVAL_OUT_CHANNEL:-sxcp_eval_out}"
|
|
log_channel="${SXCP_EVAL_LOG_CHANNEL:-sxcp_eval_log}"
|
|
guide_file="${SXCP_EVAL_GUIDE_FILE:-docs/krea2-prompt-guide.md}"
|
|
log_root="${SXCP_EVAL_LOG_DIR:-.sxcp_eval}"
|
|
run_once=0
|
|
|
|
if [ "${1:-}" != "" ] && [ "${1#-}" = "$1" ]; then
|
|
minutes="$1"
|
|
shift
|
|
fi
|
|
|
|
while [ "$#" -gt 0 ]; do
|
|
case "$1" in
|
|
-m|--minutes)
|
|
[ "$#" -ge 2 ] || die "$1 requires a value"
|
|
minutes="$2"
|
|
shift 2
|
|
;;
|
|
-i|--in)
|
|
[ "$#" -ge 2 ] || die "$1 requires a value"
|
|
in_channel="$2"
|
|
shift 2
|
|
;;
|
|
-o|--out)
|
|
[ "$#" -ge 2 ] || die "$1 requires a value"
|
|
out_channel="$2"
|
|
shift 2
|
|
;;
|
|
-l|--log)
|
|
[ "$#" -ge 2 ] || die "$1 requires a value"
|
|
log_channel="$2"
|
|
shift 2
|
|
;;
|
|
-g|--guide)
|
|
[ "$#" -ge 2 ] || die "$1 requires a value"
|
|
guide_file="$2"
|
|
shift 2
|
|
;;
|
|
-d|--dir)
|
|
[ "$#" -ge 2 ] || die "$1 requires a value"
|
|
log_root="$2"
|
|
shift 2
|
|
;;
|
|
--once)
|
|
run_once=1
|
|
shift
|
|
;;
|
|
-h|--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
die "unknown argument: $1"
|
|
;;
|
|
esac
|
|
done
|
|
|
|
minutes="${minutes:-5}"
|
|
is_positive_number "$minutes" || die "minutes must be a positive number"
|
|
|
|
mkdir -p "$log_root"
|
|
run_id="$(date -u +%Y%m%dT%H%M%SZ)"
|
|
run_dir="$log_root/$run_id"
|
|
mkdir -p "$run_dir"
|
|
events_file="$run_dir/events.tsv"
|
|
summary_file="$run_dir/summary.md"
|
|
|
|
cat > "$summary_file" <<EOF
|
|
# SxCP Eval Loop $run_id
|
|
|
|
- Interval: ${minutes} minute(s)
|
|
- Input channel: \`$in_channel\`
|
|
- Prompt output channel: \`$out_channel\`
|
|
- Log channel: \`$log_channel\`
|
|
- Krea2 prompt guide: \`$guide_file\`
|
|
|
|
## Goal
|
|
|
|
Tune the SxCP generator so its default Krea2 prompts produce the strongest
|
|
possible images for the selected scene, camera, subject, outfit, action, and
|
|
style. Every cycle should turn visual evidence into one of:
|
|
|
|
- a prompt-only A/B edit,
|
|
- a durable rule for \`$guide_file\`,
|
|
- a generator code/data change with focused test coverage.
|
|
|
|
## Protocol
|
|
|
|
1. Pull the latest prompt/image from \`$in_channel\`.
|
|
2. Compare the image against the prompt and previous edited prompt.
|
|
3. Identify concrete Krea2 mismatches and likely generator path.
|
|
4. Classify the next step: prompt-only edit, guide rule, or generator patch.
|
|
5. Push only the next test prompt to \`$out_channel\`.
|
|
6. Keep analysis in chat or \`$log_channel\`, not in \`$out_channel\`.
|
|
7. Edit generator code/data only when the issue is systemic.
|
|
8. Update \`$guide_file\` when a wording rule is confirmed.
|
|
9. Run focused smoke tests after generator edits.
|
|
|
|
## Cycles
|
|
|
|
EOF
|
|
|
|
printf 'cycle\tutc_time\trequest_file\tstatus\n' > "$events_file"
|
|
|
|
cycle=0
|
|
while :; do
|
|
cycle=$((cycle + 1))
|
|
echo "sxcp_eval_loop: cycle $cycle waiting ${minutes} minute(s) before requesting evaluation..."
|
|
sleep "${minutes}m"
|
|
|
|
stamp="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
cycle_dir="$run_dir/cycle_$(printf '%03d' "$cycle")"
|
|
mkdir -p "$cycle_dir"
|
|
request_file="$cycle_dir/request.md"
|
|
|
|
cat > "$request_file" <<EOF
|
|
Please run SxCP eval cycle $cycle now.
|
|
|
|
Primary goal:
|
|
- Tune the generator for better Krea2 images, not just one isolated image.
|
|
- Maintain/update the durable Krea2 prompt guide at: $guide_file
|
|
|
|
Channels:
|
|
- Pull latest graph output from: $in_channel
|
|
- Push prompt-only replacement to: $out_channel
|
|
- Put analysis/log text in chat or: $log_channel
|
|
|
|
Evaluation steps:
|
|
1. Pull the latest payload from $in_channel.
|
|
2. Inspect image_path and compare it to the prompt text.
|
|
3. Score these Krea2 axes: identity, outfit continuity, pose/action, camera compliance, location coherence, crop/framing, prompt noise, model confusion tokens, and overall image usefulness.
|
|
4. Identify the smallest concrete mismatch that should be tested next.
|
|
5. Classify the finding:
|
|
- prompt-only: push exactly one edited prompt to $out_channel and nothing else on that channel.
|
|
- guide-rule: update $guide_file with the confirmed Krea2 wording rule.
|
|
- generator-fix: edit the responsible generator path, add/adjust focused smoke coverage, run tests, and summarize the change.
|
|
6. Keep a clear link between the image evidence, the prompt wording, and the generator path.
|
|
7. Append the finding to the eval log with: original issue, changed wording/path, expected improvement, test result, guide update, generator update, and next hypothesis.
|
|
|
|
Current run:
|
|
- run_id: $run_id
|
|
- cycle: $cycle
|
|
- generated_at_utc: $stamp
|
|
- request_file: $request_file
|
|
- guide_file: $guide_file
|
|
EOF
|
|
|
|
{
|
|
echo
|
|
echo "### Cycle $cycle - $stamp"
|
|
echo
|
|
echo "- Request: \`$request_file\`"
|
|
echo "- Status: pending evaluation"
|
|
} >> "$summary_file"
|
|
printf '%s\t%s\t%s\t%s\n' "$cycle" "$stamp" "$request_file" "pending" >> "$events_file"
|
|
|
|
echo
|
|
echo "================ SxCP Eval Request ================"
|
|
cat "$request_file"
|
|
echo "==================================================="
|
|
echo
|
|
|
|
if [ "${SXCP_EVAL_CODEX_CMD:-}" != "" ]; then
|
|
echo "sxcp_eval_loop: piping request to SXCP_EVAL_CODEX_CMD"
|
|
SXCP_EVAL_IN_CHANNEL="$in_channel" \
|
|
SXCP_EVAL_OUT_CHANNEL="$out_channel" \
|
|
SXCP_EVAL_LOG_CHANNEL="$log_channel" \
|
|
SXCP_EVAL_GUIDE_FILE="$guide_file" \
|
|
SXCP_EVAL_REQUEST_FILE="$request_file" \
|
|
SXCP_EVAL_CYCLE_DIR="$cycle_dir" \
|
|
SXCP_EVAL_CYCLE="$cycle" \
|
|
sh -c "$SXCP_EVAL_CODEX_CMD" < "$request_file"
|
|
fi
|
|
|
|
if [ "$run_once" -eq 1 ]; then
|
|
break
|
|
fi
|
|
done
|
|
|
|
echo "sxcp_eval_loop: log written to $run_dir"
|