auto_scale_lr = dict(base_batch_size=32) backend_args = None pretrained = 'https://github.com/flytocc/mmdetection/releases/download/model_zoo/resnet34vd_pretrained_f6a72dc5.pth' data_root = '../../../media/tricolops/T7/Dataset/coco_format_bd/' dataset_type = 'CocoDataset' interpolations = ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos'] default_hooks = dict( checkpoint=dict(interval=5, save_best='auto', type='CheckpointHook'), logger=dict(interval=50, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='DetVisualizationHook')) custom_hooks = [ dict( type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, priority=49) ] default_scope = 'mmdet' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) max_epochs = 500 metainfo = dict( classes=('barcode', ), palette=[ ( 220, 20, 60, ), ]) model = dict( backbone=dict( last_stage_out_channels= 768, plugins = None, deepen_factor = 0.67, widen_factor = 0.75, input_channels = 1, out_indices = (2, 3, 4), type='YOLOv8CSPDarknet'), bbox_head=dict( num_classes=1, embed_dims= 256, #same as hidding dims in decoder and hybridencoder num_reg_fcs= 2, sync_cls_avg_factor = False, loss_cls = dict( type='VarifocalLoss', use_sigmoid = True, alpha= 0.75, gamma= 2.0, with_logits=True, iou_weighted= True, reduction= 'mean', loss_weight= 1), loss_bbox= dict(type='L1Loss', loss_weight=5.0), loss_iou= dict(type='GIoULoss', loss_weight=2.0), num_pred_layer = 6, type='RTDETRHead'), data_preprocessor=dict( pad_size_divisor=1, type='DetDataPreprocessor'), decoder=dict( num_classes=1, hidden_dim=256, num_queries=300, position_type='sine', feat_channels=[256, 256, 256], #after neck it should be 256 feat_strides=[8,16,32], #eval size should be same as encoder feat stride num_levels=3, num_crossattn_points=4, number_head=8, number_decoder_layer=4, #4 for 34 and 6 for 50 dim_feedforward_ratio=4, dropout=0.0, act_cfg= dict(type='ReLU', inplace=True), num_denoising=100, label_noise_ratio=0.5, box_noise_scale=1.0, learnt_init_query=True, eval_idx=-1, eval_spatial_size=[640, 640], eps=1e-2 ), neck=dict( in_channels=[192, 384, 576], #same as feature from back bone using last four layer #in_channels=[128,256,512] for resnet34 and [512,1024,2048] for resnet50 feat_strides=[8,16,32], hidden_dim=256, n_head=8, dim_feedforward_ratio=4, drop_out=0.0, enc_act=dict(type='GELU'), use_encoder_idx=[2], num_encoder_layers=1, pe_temperature=100*100, norm_cfg= dict(type='BN', requires_grad=True), widen_factor=0.5, deepen_factor=1, input_proj_cfg=dict( type='ChannelMapper', in_channels=[192, 384, 576], #same as input channel in encoder kernel_size=1, out_channels=256, act_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), num_outs=3 ), eval_spatial_size=[640, 640], act_cfg= dict(type='SiLU', inplace=True), type='HybridEncoder'), num_queries=300, test_cfg=dict(max_per_img=100), train_cfg=dict( assigner=dict( match_costs=[ dict(type='FocalLossCost', weight=2.0), dict(box_format='xywh', type='BBoxL1Cost', weight=5.0), dict(iou_mode='giou', type='IoUCost', weight=2.0), ], type='HungarianAssigner')), type='RtDetr',) optim_wrapper = dict( clip_grad=dict(max_norm=10, norm_type=2), optimizer=dict( type='SGD', lr=0.0025, momentum=0.937, weight_decay= 0.0005, nesterov=True,), type='OptimWrapper') param_scheduler = [ dict( type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=max_epochs) ] resume = False test_cfg = dict(type='TestLoop') test_pipeline = [ dict(backend_args=None,color_type='grayscale', type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict( type='Resize', scale=(640, 640), keep_ratio=False, interpolation='bicubic'), dict( type='PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape','scale_factor')), ] test_dataloader = dict( batch_size=1, dataset=dict( ann_file='Val/Val.json', backend_args=None, data_prefix=dict(img='Val/'), data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/', metainfo=dict(classes=('barcode', ), palette=[ ( 220, 20, 60, ), ]), pipeline=test_pipeline, test_mode=True, type='CocoDataset'), drop_last=False, num_workers=2, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val/Val.json', backend_args=None, format_only=False, metric='bbox', type='CocoMetric') train_cfg = dict(max_epochs=max_epochs, type='EpochBasedTrainLoop', val_interval=1) train_pipeline = [ dict(type='LoadImageFromFile', color_type='grayscale',backend_args = None), dict(type='LoadAnnotations', with_bbox=True), # dict( # type='RandomApply', # transforms=dict(type='PhotoMetricDistortion'), # prob=0.8), dict( type='RandomApply', transforms=dict(type='MinIoURandomCrop'), prob=0.8), dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False), dict(type='RandomFlip', prob=0.5), dict( type='RandomChoice', transforms=[[ dict( type='Resize', scale=(640, 640), keep_ratio=False, interpolation=interpolation) ] for interpolation in interpolations]), dict(type='PackDetInputs') ] train_dataloader = dict( batch_sampler=dict(type='AspectRatioBatchSampler'), batch_size=12, dataset=dict( ann_file='Train/Train.json', backend_args=None, data_prefix=dict(img='Train/'), data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/', filter_cfg=dict(filter_empty_gt=False, min_size=32), metainfo=dict(classes=('barcode', ), palette=[ ( 220, 20, 60, ), ]), pipeline=train_pipeline, type='CocoDataset'), num_workers=2, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=1, dataset=dict( ann_file='Val/Val.json', backend_args=None, data_prefix=dict(img='Val/'), data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/', metainfo=dict(classes=('barcode', ), palette=[ ( 220, 20, 60, ), ]), pipeline=[ dict(backend_args=None, color_type="grayscale",type='LoadImageFromFile'), dict(keep_ratio=False, scale=( 640, 640, ), type='Resize'), dict(type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ], test_mode=True, type='CocoDataset'), drop_last=False, num_workers=2, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict( ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val/Val.json', backend_args=None, format_only=False, metric='bbox', type='CocoMetric') vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = './work_dirs/rtdetr_yolo'