cascade_rcnn_r50_fpn.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. from mmcv.ops import RoIAlign, nms
  3. from torch.nn import BatchNorm2d
  4. from mmdet.models.backbones.resnet import ResNet
  5. from mmdet.models.data_preprocessors.data_preprocessor import \
  6. DetDataPreprocessor
  7. from mmdet.models.dense_heads.rpn_head import RPNHead
  8. from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
  9. from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
  10. from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
  11. from mmdet.models.necks.fpn import FPN
  12. from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
  13. Shared2FCBBoxHead
  14. from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
  15. from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
  16. SingleRoIExtractor
  17. from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
  18. from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
  19. DeltaXYWHBBoxCoder
  20. from mmdet.models.task_modules.prior_generators.anchor_generator import \
  21. AnchorGenerator
  22. from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
  23. # model settings
  24. model = dict(
  25. type=CascadeRCNN,
  26. data_preprocessor=dict(
  27. type=DetDataPreprocessor,
  28. mean=[123.675, 116.28, 103.53],
  29. std=[58.395, 57.12, 57.375],
  30. bgr_to_rgb=True,
  31. pad_size_divisor=32),
  32. backbone=dict(
  33. type=ResNet,
  34. depth=50,
  35. num_stages=4,
  36. out_indices=(0, 1, 2, 3),
  37. frozen_stages=1,
  38. norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
  39. norm_eval=True,
  40. style='pytorch',
  41. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  42. neck=dict(
  43. type=FPN,
  44. in_channels=[256, 512, 1024, 2048],
  45. out_channels=256,
  46. num_outs=5),
  47. rpn_head=dict(
  48. type=RPNHead,
  49. in_channels=256,
  50. feat_channels=256,
  51. anchor_generator=dict(
  52. type=AnchorGenerator,
  53. scales=[8],
  54. ratios=[0.5, 1.0, 2.0],
  55. strides=[4, 8, 16, 32, 64]),
  56. bbox_coder=dict(
  57. type=DeltaXYWHBBoxCoder,
  58. target_means=[.0, .0, .0, .0],
  59. target_stds=[1.0, 1.0, 1.0, 1.0]),
  60. loss_cls=dict(
  61. type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
  62. loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
  63. roi_head=dict(
  64. type=CascadeRoIHead,
  65. num_stages=3,
  66. stage_loss_weights=[1, 0.5, 0.25],
  67. bbox_roi_extractor=dict(
  68. type=SingleRoIExtractor,
  69. roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
  70. out_channels=256,
  71. featmap_strides=[4, 8, 16, 32]),
  72. bbox_head=[
  73. dict(
  74. type=Shared2FCBBoxHead,
  75. in_channels=256,
  76. fc_out_channels=1024,
  77. roi_feat_size=7,
  78. num_classes=80,
  79. bbox_coder=dict(
  80. type=DeltaXYWHBBoxCoder,
  81. target_means=[0., 0., 0., 0.],
  82. target_stds=[0.1, 0.1, 0.2, 0.2]),
  83. reg_class_agnostic=True,
  84. loss_cls=dict(
  85. type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
  86. loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
  87. dict(
  88. type=Shared2FCBBoxHead,
  89. in_channels=256,
  90. fc_out_channels=1024,
  91. roi_feat_size=7,
  92. num_classes=80,
  93. bbox_coder=dict(
  94. type=DeltaXYWHBBoxCoder,
  95. target_means=[0., 0., 0., 0.],
  96. target_stds=[0.05, 0.05, 0.1, 0.1]),
  97. reg_class_agnostic=True,
  98. loss_cls=dict(
  99. type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
  100. loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
  101. dict(
  102. type=Shared2FCBBoxHead,
  103. in_channels=256,
  104. fc_out_channels=1024,
  105. roi_feat_size=7,
  106. num_classes=80,
  107. bbox_coder=dict(
  108. type=DeltaXYWHBBoxCoder,
  109. target_means=[0., 0., 0., 0.],
  110. target_stds=[0.033, 0.033, 0.067, 0.067]),
  111. reg_class_agnostic=True,
  112. loss_cls=dict(
  113. type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
  114. loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
  115. ]),
  116. # model training and testing settings
  117. train_cfg=dict(
  118. rpn=dict(
  119. assigner=dict(
  120. type=MaxIoUAssigner,
  121. pos_iou_thr=0.7,
  122. neg_iou_thr=0.3,
  123. min_pos_iou=0.3,
  124. match_low_quality=True,
  125. ignore_iof_thr=-1),
  126. sampler=dict(
  127. type=RandomSampler,
  128. num=256,
  129. pos_fraction=0.5,
  130. neg_pos_ub=-1,
  131. add_gt_as_proposals=False),
  132. allowed_border=0,
  133. pos_weight=-1,
  134. debug=False),
  135. rpn_proposal=dict(
  136. nms_pre=2000,
  137. max_per_img=2000,
  138. nms=dict(type=nms, iou_threshold=0.7),
  139. min_bbox_size=0),
  140. rcnn=[
  141. dict(
  142. assigner=dict(
  143. type=MaxIoUAssigner,
  144. pos_iou_thr=0.5,
  145. neg_iou_thr=0.5,
  146. min_pos_iou=0.5,
  147. match_low_quality=False,
  148. ignore_iof_thr=-1),
  149. sampler=dict(
  150. type=RandomSampler,
  151. num=512,
  152. pos_fraction=0.25,
  153. neg_pos_ub=-1,
  154. add_gt_as_proposals=True),
  155. pos_weight=-1,
  156. debug=False),
  157. dict(
  158. assigner=dict(
  159. type=MaxIoUAssigner,
  160. pos_iou_thr=0.6,
  161. neg_iou_thr=0.6,
  162. min_pos_iou=0.6,
  163. match_low_quality=False,
  164. ignore_iof_thr=-1),
  165. sampler=dict(
  166. type=RandomSampler,
  167. num=512,
  168. pos_fraction=0.25,
  169. neg_pos_ub=-1,
  170. add_gt_as_proposals=True),
  171. pos_weight=-1,
  172. debug=False),
  173. dict(
  174. assigner=dict(
  175. type=MaxIoUAssigner,
  176. pos_iou_thr=0.7,
  177. neg_iou_thr=0.7,
  178. min_pos_iou=0.7,
  179. match_low_quality=False,
  180. ignore_iof_thr=-1),
  181. sampler=dict(
  182. type=RandomSampler,
  183. num=512,
  184. pos_fraction=0.25,
  185. neg_pos_ub=-1,
  186. add_gt_as_proposals=True),
  187. pos_weight=-1,
  188. debug=False)
  189. ]),
  190. test_cfg=dict(
  191. rpn=dict(
  192. nms_pre=1000,
  193. max_per_img=1000,
  194. nms=dict(type=nms, iou_threshold=0.7),
  195. min_bbox_size=0),
  196. rcnn=dict(
  197. score_thr=0.05,
  198. nms=dict(type=nms, iou_threshold=0.5),
  199. max_per_img=100)))