mot2coco.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. # This script converts MOT labels into COCO style.
  3. # Official website of the MOT dataset: https://motchallenge.net/
  4. #
  5. # Label format of MOT dataset:
  6. # GTs:
  7. # <frame_id> # starts from 1 but COCO style starts from 0,
  8. # <instance_id>, <x1>, <y1>, <w>, <h>,
  9. # <conf> # conf is annotated as 0 if the object is ignored,
  10. # <class_id>, <visibility>
  11. #
  12. # DETs and Results:
  13. # <frame_id>, <instance_id>, <x1>, <y1>, <w>, <h>, <conf>,
  14. # <x>, <y>, <z> # for 3D objects
  15. import argparse
  16. import os
  17. import os.path as osp
  18. from collections import defaultdict
  19. import mmengine
  20. import numpy as np
  21. from tqdm import tqdm
  22. # Classes in MOT:
  23. CLASSES = [
  24. dict(id=1, name='pedestrian'),
  25. dict(id=2, name='person_on_vehicle'),
  26. dict(id=3, name='car'),
  27. dict(id=4, name='bicycle'),
  28. dict(id=5, name='motorbike'),
  29. dict(id=6, name='non_mot_vehicle'),
  30. dict(id=7, name='static_person'),
  31. dict(id=8, name='distractor'),
  32. dict(id=9, name='occluder'),
  33. dict(id=10, name='occluder_on_ground'),
  34. dict(id=11, name='occluder_full'),
  35. dict(id=12, name='reflection'),
  36. dict(id=13, name='crowd')
  37. ]
  38. def parse_args():
  39. parser = argparse.ArgumentParser(
  40. description='Convert MOT label and detections to COCO-VID format.')
  41. parser.add_argument('-i', '--input', help='path of MOT data')
  42. parser.add_argument(
  43. '-o', '--output', help='path to save coco formatted label file')
  44. parser.add_argument(
  45. '--convert-det',
  46. action='store_true',
  47. help='convert official detection results.')
  48. parser.add_argument(
  49. '--split-train',
  50. action='store_true',
  51. help='split the train set into half-train and half-validate.')
  52. return parser.parse_args()
  53. def parse_gts(gts, is_mot15):
  54. outputs = defaultdict(list)
  55. for gt in gts:
  56. gt = gt.strip().split(',')
  57. frame_id, ins_id = map(int, gt[:2])
  58. bbox = list(map(float, gt[2:6]))
  59. if is_mot15:
  60. conf = 1.
  61. category_id = 1
  62. visibility = 1.
  63. else:
  64. conf = float(gt[6])
  65. category_id = int(gt[7])
  66. visibility = float(gt[8])
  67. anns = dict(
  68. category_id=category_id,
  69. bbox=bbox,
  70. area=bbox[2] * bbox[3],
  71. iscrowd=False,
  72. visibility=visibility,
  73. mot_instance_id=ins_id,
  74. mot_conf=conf)
  75. outputs[frame_id].append(anns)
  76. return outputs
  77. def parse_dets(dets):
  78. outputs = defaultdict(list)
  79. for det in dets:
  80. det = det.strip().split(',')
  81. frame_id, ins_id = map(int, det[:2])
  82. assert ins_id == -1
  83. bbox = list(map(float, det[2:7]))
  84. # [x1, y1, x2, y2] to be consistent with mmdet
  85. bbox = [
  86. bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3], bbox[4]
  87. ]
  88. outputs[frame_id].append(bbox)
  89. return outputs
  90. def main():
  91. args = parse_args()
  92. if not osp.isdir(args.output):
  93. os.makedirs(args.output)
  94. sets = ['train', 'test']
  95. if args.split_train:
  96. sets += ['half-train', 'half-val']
  97. vid_id, img_id, ann_id = 1, 1, 1
  98. for subset in sets:
  99. ins_id = 0
  100. print(f'Converting {subset} set to COCO format')
  101. if 'half' in subset:
  102. in_folder = osp.join(args.input, 'train')
  103. else:
  104. in_folder = osp.join(args.input, subset)
  105. out_file = osp.join(args.output, f'{subset}_cocoformat.json')
  106. outputs = defaultdict(list)
  107. outputs['categories'] = CLASSES
  108. if args.convert_det:
  109. det_file = osp.join(args.output, f'{subset}_detections.pkl')
  110. detections = dict(det_bboxes=dict())
  111. video_names = os.listdir(in_folder)
  112. for video_name in tqdm(video_names):
  113. # basic params
  114. parse_gt = 'test' not in subset
  115. ins_maps = dict()
  116. # load video infos
  117. video_folder = osp.join(in_folder, video_name)
  118. infos = mmengine.list_from_file(f'{video_folder}/seqinfo.ini')
  119. # video-level infos
  120. assert video_name == infos[1].strip().split('=')[1]
  121. img_folder = infos[2].strip().split('=')[1]
  122. img_names = os.listdir(f'{video_folder}/{img_folder}')
  123. img_names = sorted(img_names)
  124. fps = int(infos[3].strip().split('=')[1])
  125. num_imgs = int(infos[4].strip().split('=')[1])
  126. assert num_imgs == len(img_names)
  127. width = int(infos[5].strip().split('=')[1])
  128. height = int(infos[6].strip().split('=')[1])
  129. video = dict(
  130. id=vid_id,
  131. name=video_name,
  132. fps=fps,
  133. width=width,
  134. height=height)
  135. # parse annotations
  136. if parse_gt:
  137. gts = mmengine.list_from_file(f'{video_folder}/gt/gt.txt')
  138. if 'MOT15' in video_folder:
  139. img2gts = parse_gts(gts, True)
  140. else:
  141. img2gts = parse_gts(gts, False)
  142. if args.convert_det:
  143. dets = mmengine.list_from_file(f'{video_folder}/det/det.txt')
  144. img2dets = parse_dets(dets)
  145. # make half sets
  146. if 'half' in subset:
  147. split_frame = num_imgs // 2 + 1
  148. if 'train' in subset:
  149. img_names = img_names[:split_frame]
  150. elif 'val' in subset:
  151. img_names = img_names[split_frame:]
  152. else:
  153. raise ValueError(
  154. 'subset must be named with `train` or `val`')
  155. mot_frame_ids = [str(int(_.split('.')[0])) for _ in img_names]
  156. with open(f'{video_folder}/gt/gt_{subset}.txt', 'wt') as f:
  157. for gt in gts:
  158. if gt.split(',')[0] in mot_frame_ids:
  159. f.writelines(f'{gt}\n')
  160. # image and box level infos
  161. for frame_id, name in enumerate(img_names):
  162. img_name = osp.join(video_name, img_folder, name)
  163. mot_frame_id = int(name.split('.')[0])
  164. image = dict(
  165. id=img_id,
  166. video_id=vid_id,
  167. file_name=img_name,
  168. height=height,
  169. width=width,
  170. frame_id=frame_id,
  171. mot_frame_id=mot_frame_id)
  172. if parse_gt:
  173. gts = img2gts[mot_frame_id]
  174. for gt in gts:
  175. gt.update(id=ann_id, image_id=img_id)
  176. mot_ins_id = gt['mot_instance_id']
  177. if mot_ins_id in ins_maps:
  178. gt['instance_id'] = ins_maps[mot_ins_id]
  179. else:
  180. gt['instance_id'] = ins_id
  181. ins_maps[mot_ins_id] = ins_id
  182. ins_id += 1
  183. outputs['annotations'].append(gt)
  184. ann_id += 1
  185. if args.convert_det:
  186. dets = np.array(img2dets[mot_frame_id])
  187. if dets.ndim == 1:
  188. assert len(dets) == 0
  189. dets = np.zeros((0, 5))
  190. detections['det_bboxes'][img_name] = [dets]
  191. outputs['images'].append(image)
  192. img_id += 1
  193. outputs['videos'].append(video)
  194. vid_id += 1
  195. outputs['num_instances'] = ins_id
  196. print(f'{subset} has {ins_id} instances.')
  197. mmengine.dump(outputs, out_file)
  198. if args.convert_det:
  199. mmengine.dump(detections, det_file)
  200. print(f'Done! Saved as {out_file} and {det_file}')
  201. else:
  202. print(f'Done! Saved as {out_file}')
  203. if __name__ == '__main__':
  204. main()