Add detailed README with SVG diagrams and Apache 2.0 license

Includes algorithm comparison, node wiring, and parameter guide
diagrams. SVGs use <picture> tags for GitHub compatibility.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-04 17:15:07 +01:00
parent a79c5163a1
commit 3953d97163
6 changed files with 637 additions and 0 deletions

87
assets/algorithm.svg Normal file
View File

@@ -0,0 +1,87 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 880 520">
<defs>
<linearGradient id="abg" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" style="stop-color:#0d1117"/>
<stop offset="100%" style="stop-color:#161b22"/>
</linearGradient>
</defs>
<rect width="880" height="520" rx="12" fill="url(#abg)"/>
<!-- Title -->
<text x="440" y="38" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="20" font-weight="700" fill="#e6edf3" text-anchor="middle">Algorithm: Standard CFG vs SMC-CFG</text>
<!-- Left: Standard CFG -->
<rect x="30" y="60" width="390" height="430" rx="10" fill="#1c2333" stroke="#30363d" stroke-width="1.5"/>
<text x="225" y="90" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="16" font-weight="600" fill="#f0883e" text-anchor="middle">Standard CFG (P-Control)</text>
<!-- Step boxes - Standard -->
<rect x="55" y="110" width="340" height="44" rx="6" fill="#21262d" stroke="#30363d"/>
<text x="75" y="127" font-family="monospace" font-size="11" fill="#7ee787">1.</text>
<text x="95" y="127" font-family="monospace" font-size="11" fill="#c9d1d9">Compute conditional prediction</text>
<text x="95" y="145" font-family="monospace" font-size="12" font-weight="600" fill="#79c0ff">v_cond = model(x_t, t, prompt)</text>
<rect x="55" y="164" width="340" height="44" rx="6" fill="#21262d" stroke="#30363d"/>
<text x="75" y="181" font-family="monospace" font-size="11" fill="#7ee787">2.</text>
<text x="95" y="181" font-family="monospace" font-size="11" fill="#c9d1d9">Compute unconditional prediction</text>
<text x="95" y="199" font-family="monospace" font-size="12" font-weight="600" fill="#79c0ff">v_uncond = model(x_t, t, "")</text>
<rect x="55" y="218" width="340" height="44" rx="6" fill="#21262d" stroke="#30363d"/>
<text x="75" y="235" font-family="monospace" font-size="11" fill="#7ee787">3.</text>
<text x="95" y="235" font-family="monospace" font-size="11" fill="#c9d1d9">Guidance error (fixed gain)</text>
<text x="95" y="253" font-family="monospace" font-size="12" font-weight="600" fill="#79c0ff">e_t = v_cond - v_uncond</text>
<rect x="55" y="272" width="340" height="44" rx="6" fill="#21262d" stroke="#f85149" stroke-width="1.5"/>
<text x="75" y="289" font-family="monospace" font-size="11" fill="#7ee787">4.</text>
<text x="95" y="289" font-family="monospace" font-size="11" fill="#f85149">Linear combination (can diverge!)</text>
<text x="95" y="307" font-family="monospace" font-size="12" font-weight="600" fill="#ff7b72">v_out = v_uncond + w * e_t</text>
<!-- Problem callout -->
<rect x="55" y="340" width="340" height="70" rx="6" fill="#2d1117" stroke="#f85149" stroke-width="1"/>
<text x="75" y="362" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" font-weight="600" fill="#f85149">Problem at high CFG scales:</text>
<text x="75" y="382" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#f0883e">Unbounded linear gain causes oversaturation,</text>
<text x="75" y="398" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#f0883e">artifacts, and semantic drift</text>
<!-- Right: SMC-CFG -->
<rect x="460" y="60" width="390" height="430" rx="10" fill="#1c2333" stroke="#30363d" stroke-width="1.5"/>
<text x="655" y="90" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="16" font-weight="600" fill="#7ee787" text-anchor="middle">SMC-CFG (Sliding Mode Control)</text>
<!-- Step boxes - SMC -->
<rect x="485" y="110" width="340" height="44" rx="6" fill="#21262d" stroke="#30363d"/>
<text x="505" y="127" font-family="monospace" font-size="11" fill="#7ee787">1.</text>
<text x="525" y="127" font-family="monospace" font-size="11" fill="#c9d1d9">Same: compute v_cond, v_uncond</text>
<text x="525" y="145" font-family="monospace" font-size="12" font-weight="600" fill="#79c0ff">e_t = v_cond - v_uncond</text>
<rect x="485" y="164" width="340" height="44" rx="6" fill="#21262d" stroke="#30363d"/>
<text x="505" y="181" font-family="monospace" font-size="11" fill="#7ee787">2.</text>
<text x="525" y="181" font-family="monospace" font-size="11" fill="#c9d1d9">Compute sliding surface</text>
<text x="525" y="199" font-family="monospace" font-size="12" font-weight="600" fill="#d2a8ff">s_t = (e_t - e_{t-1}) + lambda * e_{t-1}</text>
<rect x="485" y="218" width="340" height="44" rx="6" fill="#21262d" stroke="#30363d"/>
<text x="505" y="235" font-family="monospace" font-size="11" fill="#7ee787">3.</text>
<text x="525" y="235" font-family="monospace" font-size="11" fill="#c9d1d9">Nonlinear switching control</text>
<text x="525" y="253" font-family="monospace" font-size="12" font-weight="600" fill="#d2a8ff">u_sw = -K * sign(s_t)</text>
<rect x="485" y="272" width="340" height="44" rx="6" fill="#21262d" stroke="#30363d"/>
<text x="505" y="289" font-family="monospace" font-size="11" fill="#7ee787">4.</text>
<text x="525" y="289" font-family="monospace" font-size="11" fill="#c9d1d9">Bounded correction</text>
<text x="525" y="307" font-family="monospace" font-size="12" font-weight="600" fill="#d2a8ff">e_corrected = e_t + u_sw</text>
<rect x="485" y="326" width="340" height="44" rx="6" fill="#21262d" stroke="#7ee787" stroke-width="1.5"/>
<text x="505" y="343" font-family="monospace" font-size="11" fill="#7ee787">5.</text>
<text x="525" y="343" font-family="monospace" font-size="11" fill="#7ee787">Stable guided output</text>
<text x="525" y="361" font-family="monospace" font-size="12" font-weight="600" fill="#7ee787">v_out = v_uncond + w * e_corrected</text>
<!-- Solution callout -->
<rect x="485" y="394" width="340" height="70" rx="6" fill="#0d2818" stroke="#238636" stroke-width="1"/>
<text x="505" y="416" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" font-weight="600" fill="#7ee787">Correction bounded to [-K, +K]:</text>
<text x="505" y="436" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#56d364">Prevents divergence at any CFG scale.</text>
<text x="505" y="452" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#56d364">Lyapunov-stable finite-time convergence.</text>
<!-- Arrow between panels -->
<path d="M 425 275 L 455 275" stroke="#484f58" stroke-width="2" fill="none" marker-end="url(#arrowhead)"/>
<defs>
<marker id="arrowhead" markerWidth="8" markerHeight="6" refX="8" refY="3" orient="auto">
<polygon points="0 0, 8 3, 0 6" fill="#484f58"/>
</marker>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 6.4 KiB

18
assets/banner.svg Normal file
View File

@@ -0,0 +1,18 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 200">
<defs>
<linearGradient id="bg" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" style="stop-color:#1a1a2e"/>
<stop offset="50%" style="stop-color:#16213e"/>
<stop offset="100%" style="stop-color:#0f3460"/>
</linearGradient>
<linearGradient id="accent" x1="0%" y1="0%" x2="100%" y2="0%">
<stop offset="0%" style="stop-color:#e94560"/>
<stop offset="100%" style="stop-color:#533483"/>
</linearGradient>
</defs>
<rect width="900" height="200" rx="12" fill="url(#bg)"/>
<rect x="0" y="185" width="900" height="15" rx="0 0 12 12" fill="url(#accent)" opacity="0.8"/>
<text x="450" y="75" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="48" font-weight="700" fill="#ffffff" text-anchor="middle">SMC-CFG Ctrl</text>
<text x="450" y="115" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="20" fill="#a0a0c0" text-anchor="middle">Sliding Mode Control for Classifier-Free Guidance</text>
<text x="450" y="150" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="15" fill="#707090" text-anchor="middle">ComfyUI Node | Based on CFG-Ctrl (CVPR 2026)</text>
</svg>

After

Width:  |  Height:  |  Size: 1.2 KiB

87
assets/node-wiring.svg Normal file
View File

@@ -0,0 +1,87 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 860 320">
<defs>
<linearGradient id="wbg" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" style="stop-color:#0d1117"/>
<stop offset="100%" style="stop-color:#161b22"/>
</linearGradient>
</defs>
<rect width="860" height="320" rx="12" fill="url(#wbg)"/>
<text x="430" y="32" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="18" font-weight="700" fill="#e6edf3" text-anchor="middle">Node Wiring in ComfyUI</text>
<!-- Node 1: Checkpoint Loader -->
<rect x="30" y="70" width="180" height="100" rx="8" fill="#1c2333" stroke="#30363d" stroke-width="1.5"/>
<rect x="30" y="70" width="180" height="28" rx="8" fill="#2d333b"/>
<rect x="30" y="90" width="180" height="8" fill="#2d333b"/>
<text x="120" y="89" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" font-weight="600" fill="#e6edf3" text-anchor="middle">Load Checkpoint</text>
<!-- Output dot -->
<circle cx="210" cy="120" r="6" fill="#79c0ff" stroke="#1c2333" stroke-width="2"/>
<text x="120" y="120" font-family="monospace" font-size="10" fill="#8b949e" text-anchor="middle">MODEL</text>
<circle cx="210" cy="145" r="6" fill="#d2a8ff" stroke="#1c2333" stroke-width="2"/>
<text x="140" y="145" font-family="monospace" font-size="10" fill="#8b949e" text-anchor="middle">CLIP</text>
<!-- Wire: Checkpoint -> SMC-CFG Ctrl -->
<path d="M 216 120 C 260 120, 260 120, 300 120" stroke="#79c0ff" stroke-width="2.5" fill="none"/>
<!-- Node 2: SMC-CFG Ctrl (highlighted) -->
<rect x="300" y="55" width="220" height="195" rx="8" fill="#1c2333" stroke="#e94560" stroke-width="2"/>
<rect x="300" y="55" width="220" height="28" rx="8" fill="#3d1a2e"/>
<rect x="300" y="75" width="220" height="8" fill="#3d1a2e"/>
<text x="410" y="74" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" font-weight="700" fill="#ff7b72" text-anchor="middle">SMC-CFG Ctrl</text>
<!-- Input dot -->
<circle cx="300" cy="120" r="6" fill="#79c0ff" stroke="#1c2333" stroke-width="2"/>
<text x="365" y="108" font-family="monospace" font-size="10" fill="#8b949e" text-anchor="middle">model</text>
<!-- Parameters -->
<rect x="315" y="120" width="190" height="22" rx="4" fill="#21262d"/>
<text x="325" y="135" font-family="monospace" font-size="10" fill="#d2a8ff">smc_cfg_lambda</text>
<text x="490" y="135" font-family="monospace" font-size="10" fill="#7ee787" text-anchor="end">5.0</text>
<rect x="315" y="148" width="190" height="22" rx="4" fill="#21262d"/>
<text x="325" y="163" font-family="monospace" font-size="10" fill="#d2a8ff">smc_cfg_K</text>
<text x="490" y="163" font-family="monospace" font-size="10" fill="#7ee787" text-anchor="end">0.2</text>
<rect x="315" y="176" width="190" height="22" rx="4" fill="#21262d"/>
<text x="325" y="191" font-family="monospace" font-size="10" fill="#d2a8ff">warmup_steps</text>
<text x="490" y="191" font-family="monospace" font-size="10" fill="#7ee787" text-anchor="end">0</text>
<!-- Output dot -->
<circle cx="520" cy="120" r="6" fill="#79c0ff" stroke="#1c2333" stroke-width="2"/>
<text x="470" y="220" font-family="monospace" font-size="10" fill="#8b949e" text-anchor="middle">MODEL</text>
<!-- Wire: SMC-CFG -> KSampler -->
<path d="M 526 120 C 570 120, 570 120, 610 120" stroke="#79c0ff" stroke-width="2.5" fill="none"/>
<!-- Node 3: KSampler -->
<rect x="610" y="55" width="220" height="220" rx="8" fill="#1c2333" stroke="#30363d" stroke-width="1.5"/>
<rect x="610" y="55" width="220" height="28" rx="8" fill="#2d333b"/>
<rect x="610" y="75" width="220" height="8" fill="#2d333b"/>
<text x="720" y="74" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" font-weight="600" fill="#e6edf3" text-anchor="middle">KSampler</text>
<!-- Input dot -->
<circle cx="610" cy="120" r="6" fill="#79c0ff" stroke="#1c2333" stroke-width="2"/>
<text x="670" y="108" font-family="monospace" font-size="10" fill="#8b949e" text-anchor="middle">model</text>
<!-- KSampler params -->
<rect x="625" y="120" width="190" height="22" rx="4" fill="#21262d"/>
<text x="635" y="135" font-family="monospace" font-size="10" fill="#8b949e">steps</text>
<text x="800" y="135" font-family="monospace" font-size="10" fill="#7ee787" text-anchor="end">20</text>
<rect x="625" y="148" width="190" height="22" rx="4" fill="#21262d"/>
<text x="635" y="163" font-family="monospace" font-size="10" fill="#f0883e">cfg</text>
<text x="800" y="163" font-family="monospace" font-size="10" fill="#f0883e" text-anchor="end">7.5</text>
<rect x="625" y="176" width="190" height="22" rx="4" fill="#21262d"/>
<text x="635" y="191" font-family="monospace" font-size="10" fill="#8b949e">sampler</text>
<text x="800" y="191" font-family="monospace" font-size="10" fill="#7ee787" text-anchor="end">euler</text>
<rect x="625" y="204" width="190" height="22" rx="4" fill="#21262d"/>
<text x="635" y="219" font-family="monospace" font-size="10" fill="#8b949e">scheduler</text>
<text x="800" y="219" font-family="monospace" font-size="10" fill="#7ee787" text-anchor="end">normal</text>
<!-- Positive/Negative conditioning wires (from off-screen) -->
<circle cx="610" cy="145" r="6" fill="#d2a8ff" stroke="#1c2333" stroke-width="2"/>
<circle cx="610" cy="170" r="6" fill="#d2a8ff" stroke="#1c2333" stroke-width="2"/>
<!-- Legend -->
<text x="430" y="300" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" fill="#484f58" text-anchor="middle">Insert the SMC-CFG Ctrl node between your model loader and sampler. Use higher CFG scales than usual.</text>
</svg>

After

Width:  |  Height:  |  Size: 5.5 KiB

83
assets/parameters.svg Normal file
View File

@@ -0,0 +1,83 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 860 440">
<defs>
<linearGradient id="pbg" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" style="stop-color:#0d1117"/>
<stop offset="100%" style="stop-color:#161b22"/>
</linearGradient>
</defs>
<rect width="860" height="440" rx="12" fill="url(#pbg)"/>
<text x="430" y="35" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="20" font-weight="700" fill="#e6edf3" text-anchor="middle">Parameter Guide</text>
<!-- Lambda -->
<rect x="30" y="55" width="390" height="165" rx="10" fill="#1c2333" stroke="#d2a8ff" stroke-width="1.5"/>
<text x="50" y="82" font-family="monospace" font-size="15" font-weight="700" fill="#d2a8ff">smc_cfg_lambda</text>
<text x="380" y="82" font-family="monospace" font-size="13" fill="#7ee787" text-anchor="end">default: 5.0</text>
<text x="50" y="108" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" fill="#8b949e">Sliding surface coefficient. Controls how aggressively the</text>
<text x="50" y="124" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" fill="#8b949e">controller corrects accumulated guidance errors.</text>
<!-- Lambda scale -->
<rect x="50" y="145" width="350" height="8" rx="4" fill="#21262d"/>
<rect x="50" y="145" width="35" height="8" rx="4" fill="#56d364"/>
<rect x="85" y="145" width="140" height="8" fill="#f0883e"/>
<rect x="225" y="145" width="175" height="8" rx="0 4 4 0" fill="#f85149"/>
<text x="50" y="170" font-family="monospace" font-size="10" fill="#56d364">0</text>
<text x="155" y="170" font-family="monospace" font-size="10" fill="#f0883e" text-anchor="middle">5.0 (sweet spot)</text>
<text x="400" y="170" font-family="monospace" font-size="10" fill="#f85149" text-anchor="end">50</text>
<text x="50" y="195" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#7ee787">Low: gentle, preserves original CFG behavior</text>
<text x="50" y="210" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#f85149">High: aggressive correction, may over-constrain</text>
<!-- K -->
<rect x="440" y="55" width="390" height="165" rx="10" fill="#1c2333" stroke="#79c0ff" stroke-width="1.5"/>
<text x="460" y="82" font-family="monospace" font-size="15" font-weight="700" fill="#79c0ff">smc_cfg_K</text>
<text x="790" y="82" font-family="monospace" font-size="13" fill="#7ee787" text-anchor="end">default: 0.2</text>
<text x="460" y="108" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" fill="#8b949e">Switching gain. Bounds the per-element correction</text>
<text x="460" y="124" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" fill="#8b949e">magnitude to [-K, +K]. Think of it as a safety clamp.</text>
<!-- K scale -->
<rect x="460" y="145" width="350" height="8" rx="4" fill="#21262d"/>
<rect x="460" y="145" width="15" height="8" rx="4" fill="#56d364"/>
<rect x="475" y="145" width="90" height="8" fill="#f0883e"/>
<rect x="565" y="145" width="245" height="8" rx="0 4 4 0" fill="#f85149"/>
<text x="460" y="170" font-family="monospace" font-size="10" fill="#56d364">0</text>
<text x="505" y="170" font-family="monospace" font-size="10" fill="#f0883e" text-anchor="middle">0.2</text>
<text x="810" y="170" font-family="monospace" font-size="10" fill="#f85149" text-anchor="end">5.0</text>
<text x="460" y="195" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#7ee787">Low: subtle correction, smooth results</text>
<text x="460" y="210" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#f85149">High: strong correction, may cause chattering</text>
<!-- Warmup -->
<rect x="30" y="240" width="390" height="175" rx="10" fill="#1c2333" stroke="#f0883e" stroke-width="1.5"/>
<text x="50" y="267" font-family="monospace" font-size="15" font-weight="700" fill="#f0883e">warmup_steps</text>
<text x="380" y="267" font-family="monospace" font-size="13" fill="#7ee787" text-anchor="end">default: 0</text>
<text x="50" y="293" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" fill="#8b949e">Initial denoising steps with NO guidance at all.</text>
<text x="50" y="309" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" fill="#8b949e">Pure conditional prediction. Lets the model establish</text>
<text x="50" y="325" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="12" fill="#8b949e">rough structure before guidance corrects direction.</text>
<!-- Timeline -->
<rect x="50" y="350" width="350" height="20" rx="4" fill="#21262d"/>
<rect x="50" y="350" width="70" height="20" rx="4" fill="#2d333b" stroke="#f0883e" stroke-width="1"/>
<rect x="120" y="350" width="280" height="20" rx="0 4 4 0" fill="#0d2818" stroke="#238636" stroke-width="1"/>
<text x="85" y="364" font-family="monospace" font-size="9" fill="#f0883e" text-anchor="middle">warmup</text>
<text x="260" y="364" font-family="monospace" font-size="9" fill="#7ee787" text-anchor="middle">SMC-CFG active</text>
<text x="50" y="393" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#8b949e">Typical: 0-5 steps. Try 2-3 for more diverse compositions.</text>
<!-- CFG Scale recommendations -->
<rect x="440" y="240" width="390" height="175" rx="10" fill="#1c2333" stroke="#7ee787" stroke-width="1.5"/>
<text x="460" y="267" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="15" font-weight="700" fill="#7ee787">Recommended CFG Scales</text>
<text x="460" y="295" font-family="monospace" font-size="12" fill="#c9d1d9">Model CFG Scale</text>
<rect x="460" y="302" width="350" height="1" fill="#30363d"/>
<text x="460" y="322" font-family="monospace" font-size="12" fill="#79c0ff">SD 1.5 / SDXL 7.0 - 12.0</text>
<text x="460" y="344" font-family="monospace" font-size="12" fill="#79c0ff">SD3 / SD3.5 7.5</text>
<text x="460" y="366" font-family="monospace" font-size="12" fill="#79c0ff">FLUX.1-dev 2.0 - 3.0</text>
<text x="460" y="388" font-family="monospace" font-size="12" fill="#79c0ff">Wan 2.1/2.2 5.0</text>
<text x="460" y="408" font-family="Segoe UI, Helvetica, Arial, sans-serif" font-size="11" fill="#8b949e">SMC-CFG lets you push CFG higher than usual</text>
</svg>

After

Width:  |  Height:  |  Size: 6.2 KiB