123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298 |
- auto_scale_lr = dict(base_batch_size=32)
- backend_args = None
- image_size=(480,480)
- pretrained = 'https://github.com/flytocc/mmdetection/releases/download/model_zoo/resnet34vd_pretrained_f6a72dc5.pth'
- data_root = '../../../media/tricolops/T7/Dataset/coco_format_bd/'
- dataset_type = 'CocoDataset'
- interpolations = ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos']
- default_hooks = dict(
- checkpoint=dict(interval=5, save_best='auto', type='CheckpointHook'),
- logger=dict(interval=50, type='LoggerHook'),
- param_scheduler=dict(type='ParamSchedulerHook'),
- sampler_seed=dict(type='DistSamplerSeedHook'),
- timer=dict(type='IterTimerHook'),
- visualization=dict(type='DetVisualizationHook'))
- custom_hooks = [
- dict(
- type='EMAHook',
- ema_type='ExpMomentumEMA',
- momentum=0.0001,
- update_buffers=True,
- priority=49)
- ]
- default_scope = 'mmdet'
- env_cfg = dict(
- cudnn_benchmark=False,
- dist_cfg=dict(backend='nccl'),
- mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
- launcher = 'none'
- load_from = None
- log_level = 'INFO'
- log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
- max_epochs = 300
- metainfo = dict(
- classes=('barcode', ), palette=[
- (
- 220,
- 20,
- 60,
- ),
- ])
- model = dict(
- data_preprocessor=dict(
- type='DetDataPreprocessor',
- batch_augments=[
- dict(
- type='BatchSyncRandomResize',
- random_size_range=(384, 576),
- size_divisor=32,
- interval=1)
- ]),
- backbone=dict(
- arch="A0",
- in_channels=1,
- base_channels=64,
- out_indices=(1, 2, 3),
- strides=(1, 2, 2, 2),
- dilations=(1, 1, 1, 1),
- frozen_stages=-1,
- norm_cfg=dict(requires_grad=True, type='BN'),
- #grad will update when not using pretrained
- norm_eval=False,
- type='RepVGG'),
- bbox_head=dict(
- num_classes=1,
- embed_dims= 256,
- #same as hidding dims in decoder and hybridencoder
- num_reg_fcs= 2,
- sync_cls_avg_factor = False,
- loss_cls = dict(
- type='VarifocalLoss',
- use_sigmoid = True,
- alpha= 0.75,
- gamma= 2.0,
- with_logits=True,
- iou_weighted= True,
- reduction= 'mean',
- loss_weight= 1),
- loss_bbox= dict(type='L1Loss', loss_weight=5.0),
- loss_iou= dict(type='GIoULoss', loss_weight=2.0),
- num_pred_layer = 6,
- type='RTDETRHead'),
- decoder=dict(
- num_classes=1,
- hidden_dim=256,
- num_queries=300,
- position_type='sine',
- feat_channels=[256, 256, 256],
- #after neck it should be 256
- feat_strides=[8,16,32],
- #eval size should be same as encoder feat stride
- num_levels=3,
- num_crossattn_points=8,
- #change cross attn points
- number_head=8,
- number_decoder_layer=3,
- #4 for 34 and 6 for 50
- dim_feedforward_ratio=4,
- dropout=0.0,
- act_cfg= dict(type='ReLU', inplace=True),
- num_denoising=100,
- label_noise_ratio=0.5,
- box_noise_scale=1.0,
- learnt_init_query=True,
- eval_idx=-1,
- eval_spatial_size=[480, 480],
- eps=1e-2
- ),
- neck=dict(
- in_channels=[128,256,512],
- #same as feature from back bone using last four layer
- #in_channels=[128,256,512] for resnet34 and [512,1024,2048] for resnet50
- feat_strides=[8,16,32],
- hidden_dim=256,
- n_head=8,
- dim_feedforward_ratio=4,
- drop_out=0.0,
- enc_act=dict(type='GELU'),
- use_encoder_idx=[2],
- num_encoder_layers=1,
- pe_temperature=100*100,
- with_spd=False,
- norm_cfg= dict(type='BN', requires_grad=True),
- widen_factor=0.5,
- deepen_factor=1,
- input_proj_cfg=dict(
- type='ChannelMapper',
- in_channels=[128, 256, 512],
- #same as input channel in encoder
- kernel_size=1,
- out_channels=256,
- act_cfg=None,
- norm_cfg=dict(type='BN', requires_grad=True),
- num_outs=3),
- eval_spatial_size=[480, 480],
- act_cfg= dict(type='SiLU', inplace=True),
- type='HybridEncoder'),
- num_queries=300,
- test_cfg=dict(max_per_img=50),
- train_cfg=dict(
- assigner=dict(
- match_costs=[
- dict(type='FocalLossCost', weight=2.0),
- dict(box_format='xywh', type='BBoxL1Cost', weight=5.0),
- dict(iou_mode='giou', type='IoUCost', weight=2.0),
- ],
- type='HungarianAssigner')),
- type='RtDetr',)
- optim_wrapper = dict(
- clip_grad=dict(max_norm=0.1, norm_type=2),
- optimizer=dict(lr=0.0004, type='AdamW', weight_decay=0.0001),
- paramwise_cfg=dict(
- custom_keys={'backbone': dict(lr_mult=0.1)},
- norm_decay_mult=0,
- bypass_duplicate=True),
- type='OptimWrapper')
- param_scheduler = [
- dict(
- type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=2000)
- ]
- resume = False
- test_cfg = dict(type='TestLoop')
- test_pipeline = [
- dict(backend_args=None,color_type='grayscale', type='LoadImageFromFile'),
- dict(type='LoadAnnotations', with_bbox=True),
- dict(
- type='Resize',
- scale=image_size,
- keep_ratio=False,
- # interpolation='bicubic'
- ),
- dict(
- type='PackDetInputs',
- meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape','scale_factor')),
- ]
- test_dataloader = dict(
- batch_size=1,
- dataset=dict(
- ann_file='Val_resize/Val_resize.json',
- backend_args=None,
- data_prefix=dict(img='Val_resize/'),
- data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
- metainfo=dict(classes=('barcode', ), palette=[
- (
- 220,
- 20,
- 60,
- ),
- ]),
- pipeline=test_pipeline,
- test_mode=True,
- type='CocoDataset'),
- drop_last=False,
- num_workers=2,
- persistent_workers=True,
- sampler=dict(shuffle=False, type='DefaultSampler'))
- test_evaluator = dict(
- ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val_resize/Val_resize.json',
- backend_args=None,
- format_only=False,
- metric='bbox',
- type='CocoMetric')
- train_cfg = dict(max_epochs=max_epochs, type='EpochBasedTrainLoop', val_interval=1)
- train_pipeline = [
- dict(type='LoadImageFromFile', color_type='grayscale',backend_args = None),
- dict(type='LoadAnnotations', with_bbox=True),
- # dict(
- # type='RandomApply',
- # transforms=dict(type='PhotoMetricDistortion'),
- # prob=0.8),
- dict(
- type='RandomApply', transforms=dict(type='MinIoURandomCrop'),
- prob=0.8),
- dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
- dict(type='RandomFlip', prob=0.5),
- dict(
- type='RandomChoice',
- transforms=[[
- dict(
- type='Resize',
- scale=image_size,
- keep_ratio=False,
- interpolation=interpolation)
- ] for interpolation in interpolations]),
- dict(type='PackDetInputs')
- ]
- train_dataloader = dict(
- batch_sampler=dict(type='AspectRatioBatchSampler'),
- batch_size=16,
- dataset=dict(
- ann_file='Train_resize/Train_resize.json',
- backend_args=None,
- data_prefix=dict(img='Train_resize/'),
- data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
- filter_cfg=dict(filter_empty_gt=False, min_size=32),
- metainfo=dict(classes=('barcode', ), palette=[
- (
- 220,
- 20,
- 60,
- ),
- ]),
- pipeline=train_pipeline,
- type='CocoDataset'),
- num_workers=2,
- persistent_workers=True,
- sampler=dict(shuffle=True, type='DefaultSampler'))
- val_cfg = dict(type='ValLoop')
- val_dataloader = dict(
- batch_size=1,
- dataset=dict(
- ann_file='Val_resize/Val_resize.json',
- backend_args=None,
- data_prefix=dict(img='Val_resize/'),
- data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
- metainfo=dict(classes=('barcode', ), palette=[
- (
- 220,
- 20,
- 60,
- ),
- ]),
- pipeline=[
- dict(backend_args=None, color_type="grayscale",type='LoadImageFromFile'),
- dict(keep_ratio=False, scale=image_size, type='Resize'),
- dict(type='LoadAnnotations', with_bbox=True),
- dict(
- meta_keys=(
- 'img_id',
- 'img_path',
- 'ori_shape',
- 'img_shape',
- 'scale_factor',
- ),
- type='PackDetInputs'),
- ],
- test_mode=True,
- type='CocoDataset'),
- drop_last=False,
- num_workers=2,
- persistent_workers=True,
- sampler=dict(shuffle=False, type='DefaultSampler'))
- val_evaluator = dict(
- ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val_resize/Val_resize.json',
- backend_args=None,
- format_only=False,
- metric='bbox',
- type='CocoMetric')
- vis_backends = [
- dict(type='LocalVisBackend'),
- ]
- visualizer = dict(
- name='visualizer',
- type='DetLocalVisualizer',
- vis_backends=[
- dict(type='LocalVisBackend'),
- ])
- work_dir = './work_dirs/rtdetrrepvgg'
|