123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- dataset_type = 'YouTubeVISDataset'
- data_root = 'data/youtube_vis_2019/'
- dataset_version = data_root[-5:-1] # 2019 or 2021
- backend_args = None
- # dataset settings
- train_pipeline = [
- dict(
- type='UniformRefFrameSample',
- num_ref_imgs=1,
- frame_range=100,
- filter_key_img=True),
- dict(
- type='TransformBroadcaster',
- share_random_params=True,
- transforms=[
- dict(type='LoadImageFromFile', backend_args=backend_args),
- dict(type='LoadTrackAnnotations', with_mask=True),
- dict(type='Resize', scale=(640, 360), keep_ratio=True),
- dict(type='RandomFlip', prob=0.5),
- ]),
- dict(type='PackTrackInputs')
- ]
- test_pipeline = [
- dict(
- type='TransformBroadcaster',
- transforms=[
- dict(type='LoadImageFromFile', backend_args=backend_args),
- dict(type='Resize', scale=(640, 360), keep_ratio=True),
- dict(type='LoadTrackAnnotations', with_mask=True),
- ]),
- dict(type='PackTrackInputs')
- ]
- # dataloader
- train_dataloader = dict(
- batch_size=2,
- num_workers=2,
- persistent_workers=True,
- # sampler=dict(type='TrackImgSampler'), # image-based sampling
- sampler=dict(type='DefaultSampler', shuffle=True),
- batch_sampler=dict(type='TrackAspectRatioBatchSampler'),
- dataset=dict(
- type=dataset_type,
- data_root=data_root,
- dataset_version=dataset_version,
- ann_file='annotations/youtube_vis_2019_train.json',
- data_prefix=dict(img_path='train/JPEGImages'),
- pipeline=train_pipeline))
- val_dataloader = dict(
- batch_size=1,
- num_workers=2,
- persistent_workers=True,
- drop_last=False,
- sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
- dataset=dict(
- type=dataset_type,
- data_root=data_root,
- dataset_version=dataset_version,
- ann_file='annotations/youtube_vis_2019_valid.json',
- data_prefix=dict(img_path='valid/JPEGImages'),
- test_mode=True,
- pipeline=test_pipeline))
- test_dataloader = val_dataloader
|