Fix noise level (900 not 1000) and prompt concatenation to match original STAR

The original STAR inference uses total_noise_levels=900, preserving input structure during SDEdit. We had 1000 which starts from near-pure noise, destroying the input. Also always append the quality prompt to user text instead of using it only as a fallback. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 02:03:34 +01:00
parent 2bf8db4f07
commit 8a440761d1
3 changed files with 14 additions and 28 deletions
--- a/star_pipeline.py
+++ b/star_pipeline.py
@@ -265,7 +265,7 @@ def run_star_inference(
    if offload == "aggressive":
        vae_dec_chunk = 1

-    total_noise_levels = 1000
+    total_noise_levels = 900

    # -- Convert ComfyUI frames to STAR format --
    video_data = comfyui_to_star_frames(images)  # [F, 3, H, W]
@@ -291,7 +291,7 @@ def run_star_inference(
    if offload != "disabled":
        text_encoder.model.to(device)
        text_encoder.device = device
-    text = prompt if prompt.strip() else cfg.positive_prompt
+    text = (prompt if prompt.strip() else "") + cfg.positive_prompt
    y = text_encoder(text).detach()
    if offload != "disabled":
        text_encoder.model.to("cpu")