Add Krea2 evaluation loop
This commit is contained in:
Executable
+228
@@ -0,0 +1,228 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage:
|
||||
tools/sxcp_eval_loop.sh [minutes] [options]
|
||||
|
||||
Loop protocol for Krea2 prompt-generator tuning. Start it right after sending a
|
||||
prompt to sxcp_eval_out. Every N minutes it writes a structured evaluation
|
||||
request, prints it, and optionally pipes it to a command. Each cycle should
|
||||
produce either a prompt-only A/B edit, a generator fix, or a prompt-guide rule.
|
||||
|
||||
Options:
|
||||
-m, --minutes N Wait N minutes between evaluation requests.
|
||||
-i, --in CHANNEL Graph-to-agent channel. Default: sxcp_eval_in.
|
||||
-o, --out CHANNEL Agent-to-graph prompt-only channel. Default: sxcp_eval_out.
|
||||
-l, --log CHANNEL Analysis/log channel name. Default: sxcp_eval_log.
|
||||
-g, --guide FILE Durable Krea2 prompt guide. Default: docs/krea2-prompt-guide.md.
|
||||
-d, --dir DIR Runtime log directory. Default: .sxcp_eval.
|
||||
--once Run one wait/check cycle and exit.
|
||||
-h, --help Show this help.
|
||||
|
||||
Optional automation:
|
||||
SXCP_EVAL_CODEX_CMD If set, the request is piped to this command.
|
||||
Example: SXCP_EVAL_CODEX_CMD="codex exec"
|
||||
|
||||
The command receives the request on stdin and these environment variables:
|
||||
SXCP_EVAL_IN_CHANNEL, SXCP_EVAL_OUT_CHANNEL, SXCP_EVAL_LOG_CHANNEL,
|
||||
SXCP_EVAL_GUIDE_FILE, SXCP_EVAL_REQUEST_FILE, SXCP_EVAL_CYCLE_DIR,
|
||||
SXCP_EVAL_CYCLE.
|
||||
EOF
|
||||
}
|
||||
|
||||
die() {
|
||||
echo "sxcp_eval_loop: $*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
is_positive_number() {
|
||||
case "${1:-}" in
|
||||
''|*[!0-9.]*|.*.*|0|0.0|0.00) return 1 ;;
|
||||
*) return 0 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
minutes="${SXCP_EVAL_MINUTES:-}"
|
||||
in_channel="${SXCP_EVAL_IN_CHANNEL:-sxcp_eval_in}"
|
||||
out_channel="${SXCP_EVAL_OUT_CHANNEL:-sxcp_eval_out}"
|
||||
log_channel="${SXCP_EVAL_LOG_CHANNEL:-sxcp_eval_log}"
|
||||
guide_file="${SXCP_EVAL_GUIDE_FILE:-docs/krea2-prompt-guide.md}"
|
||||
log_root="${SXCP_EVAL_LOG_DIR:-.sxcp_eval}"
|
||||
run_once=0
|
||||
|
||||
if [ "${1:-}" != "" ] && [ "${1#-}" = "$1" ]; then
|
||||
minutes="$1"
|
||||
shift
|
||||
fi
|
||||
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
-m|--minutes)
|
||||
[ "$#" -ge 2 ] || die "$1 requires a value"
|
||||
minutes="$2"
|
||||
shift 2
|
||||
;;
|
||||
-i|--in)
|
||||
[ "$#" -ge 2 ] || die "$1 requires a value"
|
||||
in_channel="$2"
|
||||
shift 2
|
||||
;;
|
||||
-o|--out)
|
||||
[ "$#" -ge 2 ] || die "$1 requires a value"
|
||||
out_channel="$2"
|
||||
shift 2
|
||||
;;
|
||||
-l|--log)
|
||||
[ "$#" -ge 2 ] || die "$1 requires a value"
|
||||
log_channel="$2"
|
||||
shift 2
|
||||
;;
|
||||
-g|--guide)
|
||||
[ "$#" -ge 2 ] || die "$1 requires a value"
|
||||
guide_file="$2"
|
||||
shift 2
|
||||
;;
|
||||
-d|--dir)
|
||||
[ "$#" -ge 2 ] || die "$1 requires a value"
|
||||
log_root="$2"
|
||||
shift 2
|
||||
;;
|
||||
--once)
|
||||
run_once=1
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
die "unknown argument: $1"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
minutes="${minutes:-5}"
|
||||
is_positive_number "$minutes" || die "minutes must be a positive number"
|
||||
|
||||
mkdir -p "$log_root"
|
||||
run_id="$(date -u +%Y%m%dT%H%M%SZ)"
|
||||
run_dir="$log_root/$run_id"
|
||||
mkdir -p "$run_dir"
|
||||
events_file="$run_dir/events.tsv"
|
||||
summary_file="$run_dir/summary.md"
|
||||
|
||||
cat > "$summary_file" <<EOF
|
||||
# SxCP Eval Loop $run_id
|
||||
|
||||
- Interval: ${minutes} minute(s)
|
||||
- Input channel: \`$in_channel\`
|
||||
- Prompt output channel: \`$out_channel\`
|
||||
- Log channel: \`$log_channel\`
|
||||
- Krea2 prompt guide: \`$guide_file\`
|
||||
|
||||
## Goal
|
||||
|
||||
Tune the SxCP generator so its default Krea2 prompts produce the strongest
|
||||
possible images for the selected scene, camera, subject, outfit, action, and
|
||||
style. Every cycle should turn visual evidence into one of:
|
||||
|
||||
- a prompt-only A/B edit,
|
||||
- a durable rule for \`$guide_file\`,
|
||||
- a generator code/data change with focused test coverage.
|
||||
|
||||
## Protocol
|
||||
|
||||
1. Pull the latest prompt/image from \`$in_channel\`.
|
||||
2. Compare the image against the prompt and previous edited prompt.
|
||||
3. Identify concrete Krea2 mismatches and likely generator path.
|
||||
4. Classify the next step: prompt-only edit, guide rule, or generator patch.
|
||||
5. Push only the next test prompt to \`$out_channel\`.
|
||||
6. Keep analysis in chat or \`$log_channel\`, not in \`$out_channel\`.
|
||||
7. Edit generator code/data only when the issue is systemic.
|
||||
8. Update \`$guide_file\` when a wording rule is confirmed.
|
||||
9. Run focused smoke tests after generator edits.
|
||||
|
||||
## Cycles
|
||||
|
||||
EOF
|
||||
|
||||
printf 'cycle\tutc_time\trequest_file\tstatus\n' > "$events_file"
|
||||
|
||||
cycle=0
|
||||
while :; do
|
||||
cycle=$((cycle + 1))
|
||||
echo "sxcp_eval_loop: cycle $cycle waiting ${minutes} minute(s) before requesting evaluation..."
|
||||
sleep "${minutes}m"
|
||||
|
||||
stamp="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
cycle_dir="$run_dir/cycle_$(printf '%03d' "$cycle")"
|
||||
mkdir -p "$cycle_dir"
|
||||
request_file="$cycle_dir/request.md"
|
||||
|
||||
cat > "$request_file" <<EOF
|
||||
Please run SxCP eval cycle $cycle now.
|
||||
|
||||
Primary goal:
|
||||
- Tune the generator for better Krea2 images, not just one isolated image.
|
||||
- Maintain/update the durable Krea2 prompt guide at: $guide_file
|
||||
|
||||
Channels:
|
||||
- Pull latest graph output from: $in_channel
|
||||
- Push prompt-only replacement to: $out_channel
|
||||
- Put analysis/log text in chat or: $log_channel
|
||||
|
||||
Evaluation steps:
|
||||
1. Pull the latest payload from $in_channel.
|
||||
2. Inspect image_path and compare it to the prompt text.
|
||||
3. Score these Krea2 axes: identity, outfit continuity, pose/action, camera compliance, location coherence, crop/framing, prompt noise, model confusion tokens, and overall image usefulness.
|
||||
4. Identify the smallest concrete mismatch that should be tested next.
|
||||
5. Classify the finding:
|
||||
- prompt-only: push exactly one edited prompt to $out_channel and nothing else on that channel.
|
||||
- guide-rule: update $guide_file with the confirmed Krea2 wording rule.
|
||||
- generator-fix: edit the responsible generator path, add/adjust focused smoke coverage, run tests, and summarize the change.
|
||||
6. Keep a clear link between the image evidence, the prompt wording, and the generator path.
|
||||
7. Append the finding to the eval log with: original issue, changed wording/path, expected improvement, test result, guide update, generator update, and next hypothesis.
|
||||
|
||||
Current run:
|
||||
- run_id: $run_id
|
||||
- cycle: $cycle
|
||||
- generated_at_utc: $stamp
|
||||
- request_file: $request_file
|
||||
- guide_file: $guide_file
|
||||
EOF
|
||||
|
||||
{
|
||||
echo
|
||||
echo "### Cycle $cycle - $stamp"
|
||||
echo
|
||||
echo "- Request: \`$request_file\`"
|
||||
echo "- Status: pending evaluation"
|
||||
} >> "$summary_file"
|
||||
printf '%s\t%s\t%s\t%s\n' "$cycle" "$stamp" "$request_file" "pending" >> "$events_file"
|
||||
|
||||
echo
|
||||
echo "================ SxCP Eval Request ================"
|
||||
cat "$request_file"
|
||||
echo "==================================================="
|
||||
echo
|
||||
|
||||
if [ "${SXCP_EVAL_CODEX_CMD:-}" != "" ]; then
|
||||
echo "sxcp_eval_loop: piping request to SXCP_EVAL_CODEX_CMD"
|
||||
SXCP_EVAL_IN_CHANNEL="$in_channel" \
|
||||
SXCP_EVAL_OUT_CHANNEL="$out_channel" \
|
||||
SXCP_EVAL_LOG_CHANNEL="$log_channel" \
|
||||
SXCP_EVAL_GUIDE_FILE="$guide_file" \
|
||||
SXCP_EVAL_REQUEST_FILE="$request_file" \
|
||||
SXCP_EVAL_CYCLE_DIR="$cycle_dir" \
|
||||
SXCP_EVAL_CYCLE="$cycle" \
|
||||
sh -c "$SXCP_EVAL_CODEX_CMD" < "$request_file"
|
||||
fi
|
||||
|
||||
if [ "$run_once" -eq 1 ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
echo "sxcp_eval_loop: log written to $run_dir"
|
||||
Reference in New Issue
Block a user