config_rtmdet.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. auto_scale_lr = dict(base_batch_size=12, enable=False)
  2. backend_args = None
  3. base_lr = 0.001
  4. max_epochs = 350
  5. checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
  6. custom_hooks = [
  7. dict(
  8. ema_type='ExpMomentumEMA',
  9. momentum=0.0002,
  10. priority=49,
  11. type='EMAHook',
  12. update_buffers=True),
  13. dict(
  14. switch_epoch=max_epochs-20,
  15. switch_pipeline=[
  16. dict(backend_args=None, type='LoadImageFromFile'),
  17. dict(type='LoadAnnotations', with_bbox=True),
  18. dict(
  19. keep_ratio=True,
  20. ratio_range=(
  21. 0.5,
  22. 2.0,
  23. ),
  24. scale=(
  25. 640,
  26. 640,
  27. ),
  28. type='RandomResize'),
  29. dict(crop_size=(
  30. 640,
  31. 640,
  32. ), type='RandomCrop'),
  33. dict(type='YOLOXHSVRandomAug'),
  34. dict(prob=0.5, type='RandomFlip'),
  35. dict(
  36. pad_val=dict(img=(
  37. 114,
  38. 114,
  39. 114,
  40. )),
  41. size=(
  42. 640,
  43. 640,
  44. ),
  45. type='Pad'),
  46. dict(type='PackDetInputs'),
  47. ],
  48. type='PipelineSwitchHook'),
  49. ]
  50. data_root = '../../../media/tricolops/T7/Dataset/coco_format_bd/'
  51. dataset_type = 'CocoDataset'
  52. default_hooks = dict(
  53. checkpoint=dict(interval=5, save_best='auto', type='CheckpointHook'),
  54. logger=dict(interval=50, type='LoggerHook'),
  55. param_scheduler=dict(type='ParamSchedulerHook'),
  56. sampler_seed=dict(type='DistSamplerSeedHook'),
  57. timer=dict(type='IterTimerHook'),
  58. visualization=dict(type='DetVisualizationHook'))
  59. default_scope = 'mmdet'
  60. env_cfg = dict(
  61. cudnn_benchmark=False,
  62. dist_cfg=dict(backend='nccl'),
  63. mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
  64. img_scales = [
  65. (
  66. 640,
  67. 640,
  68. ),
  69. (
  70. 320,
  71. 320,
  72. ),
  73. (
  74. 960,
  75. 960,
  76. ),
  77. ]
  78. interval = 5
  79. launcher = 'none'
  80. load_from = None
  81. log_level = 'INFO'
  82. log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
  83. metainfo = dict(
  84. classes=('barcode', ), palette=[
  85. (
  86. 220,
  87. 20,
  88. 60,
  89. ),
  90. ])
  91. model = dict(
  92. backbone=dict(
  93. act_cfg=dict(inplace=True, type='SiLU'),
  94. arch='P5',
  95. channel_attention=True,
  96. deepen_factor=0.33,
  97. expand_ratio=0.5,
  98. norm_cfg=dict(type='SyncBN'),
  99. type='CSPNeXt',
  100. widen_factor=0.5,
  101. init_cfg=dict(type='Pretrained',prefix='backbone.',checkpoint=checkpoint)),
  102. bbox_head=dict(
  103. act_cfg=dict(inplace=True, type='SiLU'),
  104. anchor_generator=dict(
  105. offset=0, strides=[
  106. 8,
  107. 16,
  108. 32,
  109. ], type='MlvlPointGenerator'),
  110. bbox_coder=dict(type='DistancePointBBoxCoder'),
  111. exp_on_reg=False,
  112. feat_channels=128,
  113. in_channels=128,
  114. loss_bbox=dict(loss_weight=1.0, type='SIoULoss'),
  115. loss_cls=dict(
  116. beta=2.0,
  117. loss_weight=2.0,
  118. type='QualityFocalLoss',
  119. use_sigmoid=True),
  120. norm_cfg=dict(type='SyncBN'),
  121. num_classes=1,
  122. pred_kernel_size=1,
  123. share_conv=True,
  124. stacked_convs=2,
  125. type='RTMDetSepBNHead',
  126. with_objectness=False),
  127. data_preprocessor=dict(
  128. batch_augments=None,
  129. bgr_to_rgb=False,
  130. mean=[
  131. 103.53,
  132. 116.28,
  133. 123.675,
  134. ],
  135. std=[
  136. 57.375,
  137. 57.12,
  138. 58.395,
  139. ],
  140. type='DetDataPreprocessor'),
  141. neck=dict(
  142. act_cfg=dict(inplace=True, type='SiLU'),
  143. expand_ratio=0.5,
  144. in_channels=[
  145. 128,
  146. 256,
  147. 512,
  148. ],
  149. norm_cfg=dict(type='SyncBN'),
  150. num_csp_blocks=1,
  151. out_channels=128,
  152. type='CSPNeXtPAFPN'),
  153. test_cfg=dict(
  154. max_per_img=300,
  155. min_bbox_size=0,
  156. nms=dict(iou_threshold=0.65, type='nms'),
  157. nms_pre=30000,
  158. score_thr=0.001),
  159. train_cfg=dict(
  160. allowed_border=-1,
  161. assigner=dict(topk=13, type='DynamicSoftLabelAssigner'),
  162. debug=False,
  163. pos_weight=-1),
  164. type='RTMDet')
  165. optim_wrapper = dict(
  166. clip_grad=dict(max_norm=0.1, norm_type=2),
  167. optimizer=dict(lr=base_lr, type='AdamW', weight_decay=0.05),
  168. paramwise_cfg=dict(
  169. bias_decay_mult=0, bypass_duplicate=True, norm_decay_mult=0),
  170. type='OptimWrapper')
  171. param_scheduler = [
  172. dict(
  173. begin=0, by_epoch=False, end=1000, start_factor=1e-05,
  174. type='LinearLR'),
  175. dict(
  176. T_max=max_epochs // 2,
  177. begin=max_epochs // 2,
  178. by_epoch=True,
  179. convert_to_iter_based=True,
  180. end=max_epochs,
  181. eta_min=base_lr * 0.05,
  182. type='CosineAnnealingLR'),
  183. ]
  184. resume = False
  185. test_cfg = dict(type='TestLoop')
  186. test_dataloader = dict(
  187. batch_size=1,
  188. dataset=dict(
  189. ann_file='Val/Val.json',
  190. backend_args=None,
  191. data_prefix=dict(img='Val/'),
  192. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  193. metainfo=dict(classes=('barcode', ), palette=[
  194. (
  195. 220,
  196. 20,
  197. 60,
  198. ),
  199. ]),
  200. pipeline=[
  201. dict(backend_args=None, type='LoadImageFromFile'),
  202. dict(keep_ratio=True, scale=(
  203. 640,
  204. 640,
  205. ), type='Resize'),
  206. dict(
  207. pad_val=dict(img=(
  208. 114,
  209. 114,
  210. 114,
  211. )),
  212. size=(
  213. 640,
  214. 640,
  215. ),
  216. type='Pad'),
  217. dict(type='LoadAnnotations', with_bbox=True),
  218. dict(
  219. meta_keys=(
  220. 'img_id',
  221. 'img_path',
  222. 'ori_shape',
  223. 'img_shape',
  224. 'scale_factor',
  225. ),
  226. type='PackDetInputs'),
  227. ],
  228. test_mode=True,
  229. type='CocoDataset'),
  230. drop_last=False,
  231. num_workers=10,
  232. persistent_workers=True,
  233. sampler=dict(shuffle=False, type='DefaultSampler'))
  234. test_evaluator = dict(
  235. ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val/Val.json',
  236. backend_args=None,
  237. format_only=False,
  238. metric='bbox',
  239. proposal_nums=(
  240. 100,
  241. 1,
  242. 10,
  243. ),
  244. type='CocoMetric')
  245. test_pipeline = [
  246. dict(backend_args=None, type='LoadImageFromFile'),
  247. dict(keep_ratio=True, scale=(
  248. 640,
  249. 640,
  250. ), type='Resize'),
  251. dict(pad_val=dict(img=(
  252. 114,
  253. 114,
  254. 114,
  255. )), size=(
  256. 640,
  257. 640,
  258. ), type='Pad'),
  259. dict(type='LoadAnnotations', with_bbox=True),
  260. dict(
  261. meta_keys=(
  262. 'img_id',
  263. 'img_path',
  264. 'ori_shape',
  265. 'img_shape',
  266. 'scale_factor',
  267. ),
  268. type='PackDetInputs'),
  269. ]
  270. train_cfg = dict(
  271. dynamic_intervals=[
  272. (
  273. 220,
  274. 1,
  275. ),
  276. ],
  277. max_epochs=max_epochs,
  278. type='EpochBasedTrainLoop',
  279. val_interval=1)
  280. train_dataloader = dict(
  281. batch_sampler=None,
  282. batch_size=12,
  283. dataset=dict(
  284. ann_file='Train/Train.json',
  285. backend_args=None,
  286. data_prefix=dict(img='Train/'),
  287. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  288. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  289. metainfo=dict(classes=('barcode', ), palette=[
  290. (
  291. 220,
  292. 20,
  293. 60,
  294. ),
  295. ]),
  296. pipeline=[
  297. dict(backend_args=None, type='LoadImageFromFile'),
  298. dict(type='LoadAnnotations', with_bbox=True),
  299. dict(
  300. img_scale=(
  301. 640,
  302. 640,
  303. ),
  304. max_cached_images=20,
  305. pad_val=114.0,
  306. random_pop=False,
  307. type='CachedMosaic'),
  308. dict(
  309. keep_ratio=True,
  310. ratio_range=(
  311. 0.5,
  312. 2.0,
  313. ),
  314. scale=(
  315. 640,
  316. 640,
  317. ),
  318. type='RandomResize'),
  319. dict(crop_size=(
  320. 640,
  321. 640,
  322. ), type='RandomCrop'),
  323. dict(type='YOLOXHSVRandomAug'),
  324. dict(prob=0.5, type='RandomFlip'),
  325. dict(
  326. pad_val=dict(img=(
  327. 114,
  328. 114,
  329. 114,
  330. )),
  331. size=(
  332. 640,
  333. 640,
  334. ),
  335. type='Pad'),
  336. dict(
  337. img_scale=(
  338. 640,
  339. 640,
  340. ),
  341. max_cached_images=10,
  342. pad_val=(
  343. 114,
  344. 114,
  345. 114,
  346. ),
  347. prob=0.5,
  348. random_pop=False,
  349. ratio_range=(
  350. 1.0,
  351. 1.0,
  352. ),
  353. type='CachedMixUp'),
  354. dict(type='PackDetInputs'),
  355. ],
  356. type='CocoDataset'),
  357. num_workers=10,
  358. persistent_workers=True,
  359. pin_memory=True,
  360. sampler=dict(shuffle=True, type='DefaultSampler'))
  361. # train_pipeline = [
  362. # dict(backend_args=None, type='LoadImageFromFile'),
  363. # dict(type='LoadAnnotations', with_bbox=True),
  364. # dict(
  365. # img_scale=(
  366. # 640,
  367. # 640,
  368. # ),
  369. # max_cached_images=20,
  370. # pad_val=114.0,
  371. # random_pop=False,
  372. # type='CachedMosaic'),
  373. # dict(
  374. # keep_ratio=True,
  375. # ratio_range=(
  376. # 0.5,
  377. # 2.0,
  378. # ),
  379. # scale=(
  380. # 1280,
  381. # 1280,
  382. # ),
  383. # type='RandomResize'),
  384. # dict(crop_size=(
  385. # 640,
  386. # 640,
  387. # ), type='RandomCrop'),
  388. # dict(type='YOLOXHSVRandomAug'),
  389. # dict(prob=0.5, type='RandomFlip'),
  390. # dict(pad_val=dict(img=(
  391. # 114,
  392. # 114,
  393. # 114,
  394. # )), size=(
  395. # 640,
  396. # 640,
  397. # ), type='Pad'),
  398. # dict(
  399. # img_scale=(
  400. # 640,
  401. # 640,
  402. # ),
  403. # max_cached_images=10,
  404. # pad_val=(
  405. # 114,
  406. # 114,
  407. # 114,
  408. # ),
  409. # prob=0.5,
  410. # random_pop=False,
  411. # ratio_range=(
  412. # 1.0,
  413. # 1.0,
  414. # ),
  415. # type='CachedMixUp'),
  416. # dict(type='PackDetInputs'),
  417. # ]
  418. # train_pipeline_stage2 = [
  419. # dict(backend_args=None, type='LoadImageFromFile'),
  420. # dict(type='LoadAnnotations', with_bbox=True),
  421. # dict(
  422. # keep_ratio=True,
  423. # ratio_range=(
  424. # 0.5,
  425. # 2.0,
  426. # ),
  427. # scale=(
  428. # 640,
  429. # 640,
  430. # ),
  431. # type='RandomResize'),
  432. # dict(crop_size=(
  433. # 640,
  434. # 640,
  435. # ), type='RandomCrop'),
  436. # dict(type='YOLOXHSVRandomAug'),
  437. # dict(prob=0.5, type='RandomFlip'),
  438. # dict(pad_val=dict(img=(
  439. # 114,
  440. # 114,
  441. # 114,
  442. # )), size=(
  443. # 640,
  444. # 640,
  445. # ), type='Pad'),
  446. # dict(type='PackDetInputs'),
  447. # ]
  448. tta_model = dict(
  449. tta_cfg=dict(max_per_img=100, nms=dict(iou_threshold=0.6, type='nms')),
  450. type='DetTTAModel')
  451. tta_pipeline = [
  452. dict(backend_args=None, type='LoadImageFromFile'),
  453. dict(
  454. transforms=[
  455. [
  456. dict(keep_ratio=True, scale=(
  457. 640,
  458. 640,
  459. ), type='Resize'),
  460. dict(keep_ratio=True, scale=(
  461. 320,
  462. 320,
  463. ), type='Resize'),
  464. dict(keep_ratio=True, scale=(
  465. 960,
  466. 960,
  467. ), type='Resize'),
  468. ],
  469. [
  470. dict(prob=1.0, type='RandomFlip'),
  471. dict(prob=0.0, type='RandomFlip'),
  472. ],
  473. [
  474. dict(
  475. pad_val=dict(img=(
  476. 114,
  477. 114,
  478. 114,
  479. )),
  480. size=(
  481. 960,
  482. 960,
  483. ),
  484. type='Pad'),
  485. ],
  486. [
  487. dict(type='LoadAnnotations', with_bbox=True),
  488. ],
  489. [
  490. dict(
  491. meta_keys=(
  492. 'img_id',
  493. 'img_path',
  494. 'ori_shape',
  495. 'img_shape',
  496. 'scale_factor',
  497. 'flip',
  498. 'flip_direction',
  499. ),
  500. type='PackDetInputs'),
  501. ],
  502. ],
  503. type='TestTimeAug'),
  504. ]
  505. val_cfg = dict(type='ValLoop')
  506. val_dataloader = dict(
  507. batch_size=1,
  508. dataset=dict(
  509. ann_file='Val/Val.json',
  510. backend_args=None,
  511. data_prefix=dict(img='Val/'),
  512. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  513. metainfo=dict(classes=('barcode', ), palette=[
  514. (
  515. 220,
  516. 20,
  517. 60,
  518. ),
  519. ]),
  520. pipeline=[
  521. dict(backend_args=None, type='LoadImageFromFile'),
  522. dict(keep_ratio=True, scale=(
  523. 640,
  524. 640,
  525. ), type='Resize'),
  526. dict(
  527. pad_val=dict(img=(
  528. 114,
  529. 114,
  530. 114,
  531. )),
  532. size=(
  533. 640,
  534. 640,
  535. ),
  536. type='Pad'),
  537. dict(type='LoadAnnotations', with_bbox=True),
  538. dict(
  539. meta_keys=(
  540. 'img_id',
  541. 'img_path',
  542. 'ori_shape',
  543. 'img_shape',
  544. 'scale_factor',
  545. ),
  546. type='PackDetInputs'),
  547. ],
  548. test_mode=True,
  549. type='CocoDataset'),
  550. drop_last=False,
  551. num_workers=10,
  552. persistent_workers=True,
  553. sampler=dict(shuffle=False, type='DefaultSampler'))
  554. val_evaluator = dict(
  555. ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val/Val.json',
  556. backend_args=None,
  557. format_only=False,
  558. metric='bbox',
  559. proposal_nums=(
  560. 100,
  561. 1,
  562. 10,
  563. ),
  564. type='CocoMetric')
  565. vis_backends = [
  566. dict(type='LocalVisBackend'),
  567. ]
  568. visualizer = dict(
  569. name='visualizer',
  570. type='DetLocalVisualizer',
  571. vis_backends=[
  572. dict(type='LocalVisBackend'),
  573. ])
  574. work_dir = './work_dirs/rtmdet_small_1xb16-300e_coco'