123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- from typing import List, Union
- import torch
- import torch.nn as nn
- from mmdet.utils import ConfigType, OptMultiConfig
- from mmengine.model import BaseModule
- from mmengine.registry import MODELS
- from mmcv.cnn import ConvModule
- from ..layers import CSPLayerWithTwoConv
- from ..utils import make_divisible, make_round
- @MODELS.register_module()
- class YOLOv8PAFPN(BaseModule):
- """Path Aggregation Network used in YOLOv8.
- Args:
- in_channels (List[int]): Number of input channels per scale.
- out_channels (int): Number of output channels (used at each scale)
- deepen_factor (float): Depth multiplier, multiply number of
- blocks in CSP layer by this amount. Defaults to 1.0.
- widen_factor (float): Width multiplier, multiply number of
- channels in each layer by this amount. Defaults to 1.0.
- num_csp_blocks (int): Number of bottlenecks in CSPLayer. Defaults to 1.
- freeze_all(bool): Whether to freeze the model
- norm_cfg (dict): Config dict for normalization layer.
- Defaults to dict(type='BN', momentum=0.03, eps=0.001).
- act_cfg (dict): Config dict for activation layer.
- Defaults to dict(type='SiLU', inplace=True).
- init_cfg (dict or list[dict], optional): Initialization config dict.
- Defaults to None.
- """
- def __init__(self,
- in_channels: List[int],
- out_channels: Union[List[int], int],
- deepen_factor: float = 1.0,
- widen_factor: float = 1.0,
- num_csp_blocks: int = 3,
- freeze_all: bool = False,
- upsample_feats_cat_first=True,
- norm_cfg: ConfigType = dict(
- type='BN', momentum=0.03, eps=0.001),
- act_cfg: ConfigType = dict(type='SiLU', inplace=True),
- init_cfg: OptMultiConfig = None):
- super().__init__(init_cfg)
- self.in_channels = in_channels
- self.out_channels = out_channels
- self.deepen_factor = deepen_factor
- self.widen_factor = widen_factor
- self.upsample_feats_cat_first = upsample_feats_cat_first
- self.freeze_all = freeze_all
- self.norm_cfg = norm_cfg
- self.act_cfg = act_cfg
- self.num_csp_blocks = num_csp_blocks
- # build top-down blocks
- self.upsample_layers = nn.ModuleList()
- self.top_down_layers = nn.ModuleList()
- for idx in range(len(in_channels) - 1, 0, -1):
- self.upsample_layers.append(self.build_upsample_layer(idx))
- self.top_down_layers.append(self.build_top_down_layer(idx))
- # build bottom-up blocks
- self.downsample_layers = nn.ModuleList()
- self.bottom_up_layers = nn.ModuleList()
- for idx in range(len(in_channels) - 1):
- self.downsample_layers.append(self.build_downsample_layer(idx))
- self.bottom_up_layers.append(self.build_bottom_up_layer(idx))
- def build_upsample_layer(self, *args, **kwargs) -> nn.Module:
- """build upsample layer."""
- return nn.Upsample(scale_factor=2, mode='nearest')
- def build_top_down_layer(self, idx: int) -> nn.Module:
- """build top down layer.
- Args:
- idx (int): layer idx.
- Returns:
- nn.Module: The top down layer.
- """
- return CSPLayerWithTwoConv(
- make_divisible((self.in_channels[idx - 1] + self.in_channels[idx]),
- self.widen_factor),
- make_divisible(self.out_channels[idx - 1], self.widen_factor),
- num_blocks=make_round(self.num_csp_blocks, self.deepen_factor),
- add_identity=False,
- norm_cfg=self.norm_cfg,
- act_cfg=self.act_cfg)
- def build_bottom_up_layer(self, idx: int) -> nn.Module:
- """build bottom up layer.
- Args:
- idx (int): layer idx.
- Returns:
- nn.Module: The bottom up layer.
- """
- return CSPLayerWithTwoConv(
- make_divisible(
- (self.out_channels[idx] + self.out_channels[idx + 1]),
- self.widen_factor),
- make_divisible(self.out_channels[idx + 1], self.widen_factor),
- num_blocks=make_round(self.num_csp_blocks, self.deepen_factor),
- add_identity=False,
- norm_cfg=self.norm_cfg,
- act_cfg=self.act_cfg)
- def build_downsample_layer(self, idx: int) -> nn.Module:
- """build downsample layer.
- Args:
- idx (int): layer idx.
- Returns:
- nn.Module: The downsample layer.
- """
- return ConvModule(
- make_divisible(self.in_channels[idx], self.widen_factor),
- make_divisible(self.in_channels[idx], self.widen_factor),
- kernel_size=3,
- stride=2,
- padding=1,
- norm_cfg=self.norm_cfg,
- act_cfg=self.act_cfg)
- def init_weights(self):
- if self.init_cfg is None:
- """Initialize the parameters."""
- for m in self.modules():
- if isinstance(m, torch.nn.Conv2d):
- # In order to be consistent with the source code,
- # reset the Conv2d initialization parameters
- m.reset_parameters()
- else:
- super().init_weights()
-
- def forward(self, inputs: List[torch.Tensor]) -> tuple:
- """Forward function."""
- assert len(inputs) == len(self.in_channels)
- # top-down path
- inner_outs = [inputs[-1]]
- for idx in range(len(self.in_channels) - 1, 0, -1):
- feat_high = inner_outs[0]
- feat_low = inputs[idx - 1]
- upsample_feat = self.upsample_layers[len(self.in_channels) - 1 -
- idx](
- feat_high)
- if self.upsample_feats_cat_first:
- top_down_layer_inputs = torch.cat([upsample_feat, feat_low], 1)
- else:
- top_down_layer_inputs = torch.cat([feat_low, upsample_feat], 1)
- inner_out = self.top_down_layers[len(self.in_channels) - 1 - idx](
- top_down_layer_inputs)
- inner_outs.insert(0, inner_out)
- # bottom-up path
- outs = [inner_outs[0]]
- for idx in range(len(self.in_channels) - 1):
- feat_low = outs[-1]
- feat_high = inner_outs[idx + 1]
- downsample_feat = self.downsample_layers[idx](feat_low)
- out = self.bottom_up_layers[idx](
- torch.cat([downsample_feat, feat_high], 1))
- outs.append(out)
- # out_layers
- results = []
- for idx in range(len(self.in_channels)):
- results.append(outs[idx])
- print(outs[idx].size())
- input()
- return tuple(results)
|