ComfyUI-SelVA/experiments/ti_sweep_1.json

{
  "name": "ti_sweep_1",
  "description": "First TI sweep: inject position, token count, learning rate, and warm init. n4_baseline already completed (suffix, loss barely moved — model likely ignores last-K positions). Priority: prefix injection group.",
  "data_dir": "/media/unraid/davinci/Selva/BJ/features",
  "output_root": "/media/unraid/davinci/Selva/BJ/experiment/ti_sweep_1",
  "base": {
    "steps": 3000,
    "batch_size": 16,
    "warmup_steps": 100,
    "save_every": 1000,
    "seed": 42,
    "init_text": "",
    "lr": 1e-3,
    "n_tokens": 4,
    "inject_mode": "suffix"
  },
  "experiments": [

    {
      "id": "n4_baseline",
      "group": "suffix_token_count",
      "description": "4 tokens, suffix, lr=1e-3, random init. COMPLETED — loss 1.025→0.965, nearly flat. Token norm grew linearly to 3.2 with no plateau. Model appears to ignore last-K positions."
    },
    {
      "id": "n8",
      "group": "suffix_token_count",
      "description": "8 tokens, suffix, lr=1e-3. More capacity — does it do better than n4_baseline?",
      "n_tokens": 8
    },

    {
      "id": "n4_prefix",
      "group": "prefix_inject",
      "description": "4 tokens at positions 1:5 (after BOS). Prefix positions carry the highest attention weight in CLIP — should produce much stronger loss signal than suffix.",
      "inject_mode": "prefix"
    },
    {
      "id": "n8_prefix",
      "group": "prefix_inject",
      "description": "8 tokens at prefix positions. More capacity + high-attention positions.",
      "n_tokens": 8,
      "inject_mode": "prefix"
    },
    {
      "id": "n4_prefix_warm",
      "group": "prefix_inject",
      "description": "4 tokens, prefix, warm-started from 'mechanical impact sound design'. Best of both: semantically meaningful start + strong gradient signal.",
      "inject_mode": "prefix",
      "init_text": "mechanical impact sound design"
    },

    {
      "id": "lr_5e4",
      "group": "learning_rate",
      "description": "4 tokens, suffix, lr=5e-4. Slower convergence — mainly a baseline comparison for the prefix group.",
      "lr": 5e-4
    },
    {
      "id": "lr_2e3",
      "group": "learning_rate",
      "description": "4 tokens, suffix, lr=2e-3. Faster early movement — does token norm plateau earlier?",
      "lr": 2e-3
    },

    {
      "id": "n4_warm",
      "group": "warm_init",
      "description": "4 tokens, suffix, warm-started from 'mechanical impact sound design'.",
      "init_text": "mechanical impact sound design"
    }

  ]
}