Add SGM-VFI (CVPR 2024) frame interpolation support
SGM-VFI combines local flow estimation with sparse global matching (GMFlow) to handle large motion and occlusion-heavy scenes. Adds 3 new nodes: Load SGM-VFI Model, SGM-VFI Interpolate, SGM-VFI Segment Interpolate. Architecture files vendored from MCG-NJU/SGM-VFI with device-awareness fixes (no hardcoded .cuda()), relative imports, and debug code removed. README updated with model comparison table. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
116
sgm_vfi_arch/backbone.py
Normal file
116
sgm_vfi_arch/backbone.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import torch.nn as nn
|
||||
|
||||
from .trident_conv import MultiScaleTridentConv
|
||||
|
||||
|
||||
class ResidualBlock(nn.Module):
|
||||
def __init__(self, in_planes, planes, norm_layer=nn.InstanceNorm2d, stride=1, dilation=1,
|
||||
):
|
||||
super(ResidualBlock, self).__init__()
|
||||
|
||||
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,
|
||||
dilation=dilation, padding=dilation, stride=stride, bias=False)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
|
||||
dilation=dilation, padding=dilation, bias=False)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
self.norm1 = norm_layer(planes)
|
||||
self.norm2 = norm_layer(planes)
|
||||
if not stride == 1 or in_planes != planes:
|
||||
self.norm3 = norm_layer(planes)
|
||||
|
||||
if stride == 1 and in_planes == planes:
|
||||
self.downsample = None
|
||||
else:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
|
||||
|
||||
def forward(self, x):
|
||||
y = x
|
||||
y = self.relu(self.norm1(self.conv1(y)))
|
||||
y = self.relu(self.norm2(self.conv2(y)))
|
||||
|
||||
if self.downsample is not None:
|
||||
x = self.downsample(x)
|
||||
|
||||
return self.relu(x + y)
|
||||
|
||||
|
||||
class CNNEncoder(nn.Module):
|
||||
def __init__(self, output_dim=128,
|
||||
norm_layer=nn.InstanceNorm2d,
|
||||
num_output_scales=1,
|
||||
**kwargs,
|
||||
):
|
||||
super(CNNEncoder, self).__init__()
|
||||
self.num_branch = num_output_scales
|
||||
|
||||
feature_dims = [64, 96, 128]
|
||||
|
||||
self.conv1 = nn.Conv2d(3, feature_dims[0], kernel_size=7, stride=2, padding=3, bias=False) # 1/2
|
||||
self.norm1 = norm_layer(feature_dims[0])
|
||||
self.relu1 = nn.ReLU(inplace=True)
|
||||
|
||||
self.in_planes = feature_dims[0]
|
||||
self.layer1 = self._make_layer(feature_dims[0], stride=1, norm_layer=norm_layer) # 1/2
|
||||
self.layer2 = self._make_layer(feature_dims[1], stride=2, norm_layer=norm_layer) # 1/4
|
||||
|
||||
# highest resolution 1/4 or 1/8
|
||||
stride = 2 if num_output_scales == 1 else 1
|
||||
self.layer3 = self._make_layer(feature_dims[2], stride=stride, norm_layer=norm_layer,
|
||||
) # 1/4 or 1/8
|
||||
|
||||
self.conv2 = nn.Conv2d(feature_dims[2], output_dim, 1, 1, 0)
|
||||
|
||||
if self.num_branch > 1:
|
||||
if self.num_branch == 4:
|
||||
strides = (1, 2, 4, 8)
|
||||
elif self.num_branch == 3:
|
||||
strides = (1, 2, 4)
|
||||
elif self.num_branch == 2:
|
||||
strides = (1, 2)
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
self.trident_conv = MultiScaleTridentConv(output_dim, output_dim,
|
||||
kernel_size=3,
|
||||
strides=strides,
|
||||
paddings=1,
|
||||
num_branch=self.num_branch,
|
||||
)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
|
||||
if m.weight is not None:
|
||||
nn.init.constant_(m.weight, 1)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def _make_layer(self, dim, stride=1, dilation=1, norm_layer=nn.InstanceNorm2d):
|
||||
layer1 = ResidualBlock(self.in_planes, dim, norm_layer=norm_layer, stride=stride, dilation=dilation)
|
||||
layer2 = ResidualBlock(dim, dim, norm_layer=norm_layer, stride=1, dilation=dilation)
|
||||
|
||||
layers = (layer1, layer2)
|
||||
|
||||
self.in_planes = dim
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.norm1(x)
|
||||
x = self.relu1(x)
|
||||
|
||||
x = self.layer1(x) # 1/2
|
||||
x = self.layer2(x) # 1/4
|
||||
x = self.layer3(x) # 1/8 or 1/4
|
||||
|
||||
x = self.conv2(x)
|
||||
|
||||
if self.num_branch > 1:
|
||||
out = self.trident_conv([x] * self.num_branch) # high to low res
|
||||
else:
|
||||
out = [x]
|
||||
|
||||
return out
|
||||
Reference in New Issue
Block a user