maskformer_swin-l-p4-w12_64xb1-ms-300e_coco.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. _base_ = './maskformer_r50_ms-16xb1-75e_coco.py'
  2. pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa
  3. depths = [2, 2, 18, 2]
  4. model = dict(
  5. backbone=dict(
  6. _delete_=True,
  7. type='SwinTransformer',
  8. pretrain_img_size=384,
  9. embed_dims=192,
  10. patch_size=4,
  11. window_size=12,
  12. mlp_ratio=4,
  13. depths=depths,
  14. num_heads=[6, 12, 24, 48],
  15. qkv_bias=True,
  16. qk_scale=None,
  17. drop_rate=0.,
  18. attn_drop_rate=0.,
  19. drop_path_rate=0.3,
  20. patch_norm=True,
  21. out_indices=(0, 1, 2, 3),
  22. with_cp=False,
  23. convert_weights=True,
  24. init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
  25. panoptic_head=dict(
  26. in_channels=[192, 384, 768, 1536], # pass to pixel_decoder inside
  27. pixel_decoder=dict(
  28. _delete_=True,
  29. type='PixelDecoder',
  30. norm_cfg=dict(type='GN', num_groups=32),
  31. act_cfg=dict(type='ReLU')),
  32. enforce_decoder_input_project=True))
  33. # optimizer
  34. # weight_decay = 0.01
  35. # norm_weight_decay = 0.0
  36. # embed_weight_decay = 0.0
  37. embed_multi = dict(lr_mult=1.0, decay_mult=0.0)
  38. norm_multi = dict(lr_mult=1.0, decay_mult=0.0)
  39. custom_keys = {
  40. 'norm': norm_multi,
  41. 'absolute_pos_embed': embed_multi,
  42. 'relative_position_bias_table': embed_multi,
  43. 'query_embed': embed_multi
  44. }
  45. optim_wrapper = dict(
  46. optimizer=dict(lr=6e-5, weight_decay=0.01),
  47. paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0))
  48. max_epochs = 300
  49. # learning rate
  50. param_scheduler = [
  51. dict(
  52. type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
  53. dict(
  54. type='MultiStepLR',
  55. begin=0,
  56. end=max_epochs,
  57. by_epoch=True,
  58. milestones=[250],
  59. gamma=0.1)
  60. ]
  61. train_cfg = dict(max_epochs=max_epochs)
  62. # NOTE: `auto_scale_lr` is for automatically scaling LR,
  63. # USER SHOULD NOT CHANGE ITS VALUES.
  64. # base_batch_size = (64 GPUs) x (1 samples per GPU)
  65. auto_scale_lr = dict(base_batch_size=64)