Bundle sparse_sage Triton kernel for block-sparse attention
Without sparse attention, the model uses full (dense) attention which attends to distant irrelevant information, causing ghosting artifacts. The FlashVSR paper explicitly requires block-sparse attention. Vendored from SageAttention team (Apache 2.0), pure Triton (no CUDA C++). Import chain: local sparse_sage → external sageattn.core → SDPA fallback. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -31,15 +31,24 @@ except Exception:
|
||||
SAGE_ATTN_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from sageattn.core import sparse_sageattn
|
||||
from .sparse_sage.core import sparse_sageattn
|
||||
assert callable(sparse_sageattn)
|
||||
SPARSE_SAGE_AVAILABLE = True
|
||||
except Exception:
|
||||
SPARSE_SAGE_AVAILABLE = False
|
||||
sparse_sageattn = None
|
||||
try:
|
||||
from sageattn.core import sparse_sageattn
|
||||
assert callable(sparse_sageattn)
|
||||
SPARSE_SAGE_AVAILABLE = True
|
||||
except Exception:
|
||||
SPARSE_SAGE_AVAILABLE = False
|
||||
sparse_sageattn = None
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
print(f"[FlashVSR] Attention backends: sparse_sage={SPARSE_SAGE_AVAILABLE}, "
|
||||
f"flash_attn_3={FLASH_ATTN_3_AVAILABLE}, flash_attn_2={FLASH_ATTN_2_AVAILABLE}, "
|
||||
f"sage_attn={SAGE_ATTN_AVAILABLE}")
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Local / window masks
|
||||
|
||||
Reference in New Issue
Block a user