{ "name": "ti_sweep_1", "description": "First TI sweep: inject position, token count, learning rate, and warm init. n4_baseline already completed (suffix, loss barely moved — model likely ignores last-K positions). Priority: prefix injection group.", "data_dir": "/media/unraid/davinci/Selva/BJ/features", "output_root": "/media/unraid/davinci/Selva/BJ/experiment/ti_sweep_1", "base": { "steps": 3000, "batch_size": 16, "warmup_steps": 100, "save_every": 1000, "seed": 42, "init_text": "", "lr": 1e-3, "n_tokens": 4, "inject_mode": "suffix" }, "experiments": [ { "id": "n4_baseline", "group": "suffix_token_count", "description": "4 tokens, suffix, lr=1e-3, random init. COMPLETED — loss 1.025→0.965, nearly flat. Token norm grew linearly to 3.2 with no plateau. Model appears to ignore last-K positions." }, { "id": "n8", "group": "suffix_token_count", "description": "8 tokens, suffix, lr=1e-3. More capacity — does it do better than n4_baseline?", "n_tokens": 8 }, { "id": "n4_prefix", "group": "prefix_inject", "description": "4 tokens at positions 1:5 (after BOS). Prefix positions carry the highest attention weight in CLIP — should produce much stronger loss signal than suffix.", "inject_mode": "prefix" }, { "id": "n8_prefix", "group": "prefix_inject", "description": "8 tokens at prefix positions. More capacity + high-attention positions.", "n_tokens": 8, "inject_mode": "prefix" }, { "id": "n4_prefix_warm", "group": "prefix_inject", "description": "4 tokens, prefix, warm-started from 'mechanical impact sound design'. Best of both: semantically meaningful start + strong gradient signal.", "inject_mode": "prefix", "init_text": "mechanical impact sound design" }, { "id": "lr_5e4", "group": "learning_rate", "description": "4 tokens, suffix, lr=5e-4. Slower convergence — mainly a baseline comparison for the prefix group.", "lr": 5e-4 }, { "id": "lr_2e3", "group": "learning_rate", "description": "4 tokens, suffix, lr=2e-3. Faster early movement — does token norm plateau earlier?", "lr": 2e-3 }, { "id": "n4_warm", "group": "warm_init", "description": "4 tokens, suffix, warm-started from 'mechanical impact sound design'.", "init_text": "mechanical impact sound design" } ] }