bd_maskrcnn_swinw7d4_b2_50e.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. # '../_base_/models/mask_rcnn_r50_fpn.py'
  2. # '../swin/mask-rcnn_swin-t-p4-w7_fpn_1x_coco.py'
  3. # model settings
  4. model = dict(
  5. type='MaskRCNN',
  6. data_preprocessor=dict(
  7. type='DetDataPreprocessor',
  8. mean=[123.675, 116.28, 103.53],
  9. std=[58.395, 57.12, 57.375],
  10. bgr_to_rgb=True,
  11. pad_mask=True,
  12. pad_size_divisor=32),
  13. backbone=dict(
  14. type='SwinTransformer',
  15. embed_dims=96,
  16. depths=[2, 2, 6, 2],
  17. num_heads=[3, 6, 12, 24],
  18. window_size=7,
  19. mlp_ratio=4,
  20. qkv_bias=True,
  21. qk_scale=None,
  22. drop_rate=0.,
  23. attn_drop_rate=0.,
  24. drop_path_rate=0.2,
  25. patch_norm=True,
  26. out_indices=(0, 1, 2, 3),
  27. with_cp=False,
  28. convert_weights=True,
  29. ),
  30. neck=dict(
  31. type='FPN',
  32. in_channels=[96, 192, 384, 768],
  33. out_channels=256,
  34. num_outs=5),
  35. rpn_head=dict(
  36. type='RPNHead',
  37. in_channels=256,
  38. feat_channels=256,
  39. anchor_generator=dict(
  40. type='AnchorGenerator',
  41. scales=[8],
  42. ratios=[0.5, 1.0, 2.0],
  43. strides=[4, 8, 16, 32, 64]),
  44. bbox_coder=dict(
  45. type='DeltaXYWHBBoxCoder',
  46. target_means=[.0, .0, .0, .0],
  47. target_stds=[1.0, 1.0, 1.0, 1.0]),
  48. loss_cls=dict(
  49. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  50. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  51. roi_head=dict(
  52. type='StandardRoIHead',
  53. bbox_roi_extractor=dict(
  54. type='SingleRoIExtractor',
  55. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  56. out_channels=256,
  57. featmap_strides=[4, 8, 16, 32]),
  58. bbox_head=dict(
  59. type='Shared2FCBBoxHead',
  60. in_channels=256,
  61. fc_out_channels=1024,
  62. roi_feat_size=7,
  63. num_classes=1,
  64. bbox_coder=dict(
  65. type='DeltaXYWHBBoxCoder',
  66. target_means=[0., 0., 0., 0.],
  67. target_stds=[0.1, 0.1, 0.2, 0.2]),
  68. reg_class_agnostic=False,
  69. loss_cls=dict(
  70. type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
  71. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  72. mask_roi_extractor=dict(
  73. type='SingleRoIExtractor',
  74. roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
  75. out_channels=256,
  76. featmap_strides=[4, 8, 16, 32]),
  77. mask_head=dict(
  78. type='FCNMaskHead',
  79. num_convs=4,
  80. in_channels=256,
  81. conv_out_channels=256,
  82. num_classes=1,
  83. loss_mask=dict(
  84. type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
  85. # model training and testing settings
  86. train_cfg=dict(
  87. rpn=dict(
  88. assigner=dict(
  89. type='MaxIoUAssigner',
  90. pos_iou_thr=0.7,
  91. neg_iou_thr=0.3,
  92. min_pos_iou=0.3,
  93. match_low_quality=True,
  94. ignore_iof_thr=-1),
  95. sampler=dict(
  96. type='RandomSampler',
  97. num=256,
  98. pos_fraction=0.5,
  99. neg_pos_ub=-1,
  100. add_gt_as_proposals=False),
  101. allowed_border=-1,
  102. pos_weight=-1,
  103. debug=False),
  104. rpn_proposal=dict(
  105. nms_pre=2000,
  106. max_per_img=1000,
  107. nms=dict(type='nms', iou_threshold=0.7),
  108. min_bbox_size=0),
  109. rcnn=dict(
  110. assigner=dict(
  111. type='MaxIoUAssigner',
  112. pos_iou_thr=0.5,
  113. neg_iou_thr=0.5,
  114. min_pos_iou=0.5,
  115. match_low_quality=True,
  116. ignore_iof_thr=-1),
  117. sampler=dict(
  118. type='RandomSampler',
  119. num=512,
  120. pos_fraction=0.25,
  121. neg_pos_ub=-1,
  122. add_gt_as_proposals=True),
  123. mask_size=28,
  124. pos_weight=-1,
  125. debug=False)),
  126. test_cfg=dict(
  127. rpn=dict(
  128. nms_pre=1000,
  129. max_per_img=1000,
  130. nms=dict(type='nms', iou_threshold=0.7),
  131. min_bbox_size=0),
  132. rcnn=dict(
  133. score_thr=0.05,
  134. nms=dict(type='nms', iou_threshold=0.5),
  135. max_per_img=100,
  136. mask_thr_binary=0.5)))
  137. # '../_base_/datasets/coco_instance.py',
  138. # dataset settings
  139. dataset_type = 'CocoDataset'
  140. data_root = 'barcode_coco'
  141. metainfo = {
  142. 'classes': ('barcode',),
  143. 'palette': [
  144. (220, 20, 60),
  145. ]
  146. }
  147. img_norm_cfg = dict(
  148. mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
  149. backend_args = None
  150. train_pipeline = [
  151. dict(type='LoadImageFromFile', backend_args=backend_args),
  152. dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
  153. dict(type='Resize', scale=(1333, 800), keep_ratio=True),
  154. dict(type='RandomFlip', prob=0.5),
  155. dict(type='PackDetInputs')
  156. ]
  157. test_pipeline = [
  158. dict(type='LoadImageFromFile', backend_args=backend_args),
  159. dict(type='Resize', scale=(1333, 800), keep_ratio=True),
  160. # If you don't have a gt annotation, delete the pipeline
  161. dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
  162. dict(
  163. type='PackDetInputs',
  164. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  165. 'scale_factor'))
  166. ]
  167. train_dataloader = dict(
  168. batch_size=1,
  169. num_workers=2,
  170. persistent_workers=True,
  171. sampler=dict(type='DefaultSampler', shuffle=True),
  172. batch_sampler=dict(type='AspectRatioBatchSampler'),
  173. dataset=dict(
  174. type=dataset_type,
  175. data_root=data_root,
  176. metainfo=metainfo,
  177. ann_file='train.json',
  178. data_prefix=dict(img=''),
  179. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  180. pipeline=train_pipeline,
  181. backend_args=backend_args))
  182. val_dataloader = dict(
  183. batch_size=1,
  184. num_workers=2,
  185. persistent_workers=True,
  186. drop_last=False,
  187. sampler=dict(type='DefaultSampler', shuffle=False),
  188. dataset=dict(
  189. type=dataset_type,
  190. data_root=data_root,
  191. metainfo=metainfo,
  192. ann_file='val.json',
  193. data_prefix=dict(img=''),
  194. test_mode=True,
  195. pipeline=test_pipeline,
  196. backend_args=backend_args))
  197. test_dataloader = val_dataloader
  198. val_evaluator = dict(
  199. type='CocoMetric',
  200. ann_file=data_root + '/val.json',
  201. metric=['bbox', 'segm'],
  202. format_only=False,
  203. backend_args=backend_args)
  204. test_evaluator = val_evaluator
  205. # '../_base_/schedules/schedule_1x.py'
  206. # optimizer
  207. # optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
  208. # training schedule for 1x
  209. train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=50, val_interval=5)
  210. val_cfg = dict(type='ValLoop')
  211. test_cfg = dict(type='TestLoop')
  212. # learning rate
  213. param_scheduler = [
  214. dict(
  215. type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
  216. dict(
  217. type='MultiStepLR',
  218. begin=0,
  219. end=12,
  220. by_epoch=True,
  221. milestones=[8, 11],
  222. gamma=0.1)
  223. ]
  224. # optimizer
  225. optim_wrapper = dict(
  226. type='OptimWrapper',
  227. optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
  228. # Default setting for scaling LR automatically
  229. # - `enable` means enable scaling LR automatically
  230. # or not by default.
  231. # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
  232. auto_scale_lr = dict(enable=False, base_batch_size=2)
  233. #'../_base_/default_runtime.py'
  234. default_scope = 'mmdet'
  235. default_hooks = dict(
  236. timer=dict(type='IterTimerHook'),
  237. logger=dict(type='LoggerHook', interval=50),
  238. param_scheduler=dict(type='ParamSchedulerHook'),
  239. checkpoint=dict(type='CheckpointHook', interval=1),
  240. sampler_seed=dict(type='DistSamplerSeedHook'),
  241. visualization=dict(type='DetVisualizationHook'))
  242. env_cfg = dict(
  243. cudnn_benchmark=False,
  244. mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
  245. dist_cfg=dict(backend='nccl'),
  246. )
  247. vis_backends = [dict(type='LocalVisBackend')]
  248. visualizer = dict(
  249. type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
  250. log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
  251. log_level = 'INFO'
  252. load_from = None
  253. resume = False
  254. # lr_config = dict(warmup_iters=1000, step=[8, 11])
  255. # change data dir
  256. # change data processor default as pth style
  257. # model = dict(
  258. # # use caffe img_norm
  259. # data_preprocessor=dict(
  260. # mean=[103.530, 116.280, 123.675],
  261. # std=[1.0, 1.0, 1.0],
  262. # bgr_to_rgb=False))
  263. # #train data change