yolox_x_8xb4-80e_crowdhuman-mot17halftrain_test-mot17halfval.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. _base_ = ['../yolox/yolox_x_8xb8-300e_coco.py']
  2. data_root = 'data/MOT17/'
  3. img_scale = (1440, 800) # width, height
  4. batch_size = 4
  5. # model settings
  6. model = dict(
  7. bbox_head=dict(num_classes=1),
  8. test_cfg=dict(nms=dict(iou_threshold=0.7)),
  9. init_cfg=dict(
  10. type='Pretrained',
  11. checkpoint= # noqa: E251
  12. 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth' # noqa: E501
  13. ))
  14. train_pipeline = [
  15. dict(
  16. type='Mosaic',
  17. img_scale=img_scale,
  18. pad_val=114.0,
  19. bbox_clip_border=False),
  20. dict(
  21. type='RandomAffine',
  22. scaling_ratio_range=(0.1, 2),
  23. border=(-img_scale[0] // 2, -img_scale[1] // 2),
  24. bbox_clip_border=False),
  25. dict(
  26. type='MixUp',
  27. img_scale=img_scale,
  28. ratio_range=(0.8, 1.6),
  29. pad_val=114.0,
  30. bbox_clip_border=False),
  31. dict(type='YOLOXHSVRandomAug'),
  32. dict(type='RandomFlip', prob=0.5),
  33. dict(
  34. type='Resize',
  35. scale=img_scale,
  36. keep_ratio=True,
  37. clip_object_border=False),
  38. dict(type='Pad', size_divisor=32, pad_val=dict(img=(114.0, 114.0, 114.0))),
  39. dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
  40. dict(type='PackDetInputs')
  41. ]
  42. test_pipeline = [
  43. dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
  44. dict(type='Resize', scale=img_scale, keep_ratio=True),
  45. dict(type='Pad', size_divisor=32, pad_val=dict(img=(114.0, 114.0, 114.0))),
  46. dict(type='LoadAnnotations', with_bbox=True),
  47. dict(
  48. type='PackDetInputs',
  49. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  50. 'scale_factor'))
  51. ]
  52. train_dataloader = dict(
  53. _delete_=True,
  54. batch_size=batch_size,
  55. num_workers=4,
  56. persistent_workers=True,
  57. pin_memory=True,
  58. sampler=dict(type='DefaultSampler', shuffle=True),
  59. dataset=dict(
  60. type='MultiImageMixDataset',
  61. dataset=dict(
  62. type='ConcatDataset',
  63. datasets=[
  64. dict(
  65. type='CocoDataset',
  66. data_root=data_root,
  67. ann_file='annotations/half-train_cocoformat.json',
  68. data_prefix=dict(img='train'),
  69. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  70. metainfo=dict(classes=('pedestrian', )),
  71. pipeline=[
  72. dict(
  73. type='LoadImageFromFile',
  74. backend_args=_base_.backend_args),
  75. dict(type='LoadAnnotations', with_bbox=True),
  76. ]),
  77. dict(
  78. type='CocoDataset',
  79. data_root='data/crowdhuman',
  80. ann_file='annotations/crowdhuman_train.json',
  81. data_prefix=dict(img='train'),
  82. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  83. metainfo=dict(classes=('pedestrian', )),
  84. pipeline=[
  85. dict(
  86. type='LoadImageFromFile',
  87. backend_args=_base_.backend_args),
  88. dict(type='LoadAnnotations', with_bbox=True),
  89. ]),
  90. dict(
  91. type='CocoDataset',
  92. data_root='data/crowdhuman',
  93. ann_file='annotations/crowdhuman_val.json',
  94. data_prefix=dict(img='val'),
  95. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  96. metainfo=dict(classes=('pedestrian', )),
  97. pipeline=[
  98. dict(
  99. type='LoadImageFromFile',
  100. backend_args=_base_.backend_args),
  101. dict(type='LoadAnnotations', with_bbox=True),
  102. ]),
  103. ]),
  104. pipeline=train_pipeline))
  105. val_dataloader = dict(
  106. batch_size=1,
  107. num_workers=2,
  108. dataset=dict(
  109. data_root=data_root,
  110. ann_file='annotations/half-val_cocoformat.json',
  111. data_prefix=dict(img='train'),
  112. metainfo=dict(classes=('pedestrian', )),
  113. pipeline=test_pipeline))
  114. test_dataloader = val_dataloader
  115. # training settings
  116. max_epochs = 80
  117. num_last_epochs = 10
  118. interval = 5
  119. train_cfg = dict(max_epochs=max_epochs, val_begin=75, val_interval=1)
  120. # optimizer
  121. # default 8 gpu
  122. base_lr = 0.001 / 8 * batch_size
  123. optim_wrapper = dict(optimizer=dict(lr=base_lr))
  124. # learning rate
  125. param_scheduler = [
  126. dict(
  127. type='QuadraticWarmupLR',
  128. by_epoch=True,
  129. begin=0,
  130. end=1,
  131. convert_to_iter_based=True),
  132. dict(
  133. type='CosineAnnealingLR',
  134. eta_min=base_lr * 0.05,
  135. begin=1,
  136. T_max=max_epochs - num_last_epochs,
  137. end=max_epochs - num_last_epochs,
  138. by_epoch=True,
  139. convert_to_iter_based=True),
  140. dict(
  141. type='ConstantLR',
  142. by_epoch=True,
  143. factor=1,
  144. begin=max_epochs - num_last_epochs,
  145. end=max_epochs,
  146. )
  147. ]
  148. default_hooks = dict(
  149. checkpoint=dict(
  150. interval=1,
  151. max_keep_ckpts=5 # only keep latest 5 checkpoints
  152. ))
  153. custom_hooks = [
  154. dict(
  155. type='YOLOXModeSwitchHook',
  156. num_last_epochs=num_last_epochs,
  157. priority=48),
  158. dict(type='SyncNormHook', priority=48),
  159. dict(
  160. type='EMAHook',
  161. ema_type='ExpMomentumEMA',
  162. momentum=0.0001,
  163. update_buffers=True,
  164. priority=49)
  165. ]
  166. # evaluator
  167. val_evaluator = dict(
  168. ann_file=data_root + 'annotations/half-val_cocoformat.json',
  169. format_only=False)
  170. test_evaluator = val_evaluator
  171. del _base_.tta_model
  172. del _base_.tta_pipeline
  173. del _base_.train_dataset