diff --git a/cfg/segment/swin_t-fpn.yaml b/cfg/segment/swin_t-fpn.yaml new file mode 100644 index 0000000..d902f4c --- /dev/null +++ b/cfg/segment/swin_t-fpn.yaml @@ -0,0 +1,39 @@ +# Metadata +author: Y-T-G +task: segment +keywords: [transformer, swin, fpn] +description: Swin-T pretrained backbone with YOLO11 FPN neck and Segment head. +flops: 125.8 +parameters: 42152365 +min_version: 8.3.64 + +# Model +nc: 1 +strides: [8, 16, 32] +scales: + # [depth, width, max_channels] + default: [1.00, 1.00, 1024] +backbone: + # [from, repeats, module, args] + - [-1, 1, TorchVision, [768, swin_t, DEFAULT, True, 5, True]] # 0 + - [0, 1, Index, [192, 4]] # (1, 80, 80, 192) - 1 + - [-1, 1, torchvision.ops.Permute, [[0, 3, 1, 2]]] # (1, 192, 80, 80) - 2 + - [0, 1, Index, [384, 6]] # (1, 40, 40, 384) - 3 + - [-1, 1, torchvision.ops.Permute, [[0, 3, 1, 2]]] # (1, 384, 40, 40) - 4 + - [0, 1, Index, [768, 8]] # (1, 20, 20, 768) - 5 + - [-1, 1, torchvision.ops.Permute, [[0, 3, 1, 2]]] # (1, 768, 20, 20) - 6 + - [-1, 1, SPPF, [768, 5]] # (1, 768, 20, 20) - 7 +head: + - [-1, 1, nn.Upsample, [None, 2, nearest]] # (1, 768, 40, 40) - 8 + - [[-1, 4], 1, Concat, [1]] # (1, 1152, 40, 40) - 9 + - [-1, 2, C3k2, [384, False]] # (1, 384, 40, 40) - 10 + - [-1, 1, nn.Upsample, [None, 2, nearest]] # (1, 384, 80, 80) - 11 + - [[-1, 2], 1, Concat, [1]] # (1, 576, 80, 80) - 12 + - [-1, 2, C3k2, [192, False]] # (1, 192, 80, 80) - 13 + - [-1, 1, Conv, [192, 3, 2]] # (1, 192, 40, 40) - 14 + - [[-1, -5], 1, Concat, [1]] # (1, 576, 40, 40) - 15 + - [-1, 2, C3k2, [384, False]] # (1, 384, 40, 40) - 16 + - [-1, 1, Conv, [384, 3, 2]] # (1, 384, 20, 20) - 17 + - [[-1, -11], 1, Concat, [1]] # (1, 1152, 20, 20) - 18 + - [-1, 2, C3k2, [768, True]] # (1, 768, 20, 20) - 19 + - [[-7, -4, -1], 1, Segment, [nc, 32, 256]] # 20