json_to_dataset.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. import argparse
  2. import base64
  3. import json
  4. import os
  5. import os.path as osp
  6. import warnings
  7. import PIL.Image
  8. import yaml
  9. from labelme import utils
  10. def main():
  11. warnings.warn("This script is aimed to demonstrate how to convert the\n"
  12. "JSON file to a single image dataset, and not to handle\n"
  13. "multiple JSON files to generate a real-use dataset.")
  14. parser = argparse.ArgumentParser()
  15. parser.add_argument('json_file')
  16. parser.add_argument('-o', '--out', default=None)
  17. args = parser.parse_args()
  18. json_file = args.json_file
  19. if args.out is None:
  20. out_dir = osp.basename(json_file).replace('.', '_')
  21. out_dir = osp.join(osp.dirname(json_file), out_dir)
  22. else:
  23. out_dir = args.out
  24. if not osp.exists(out_dir):
  25. os.mkdir(out_dir)
  26. data = json.load(open(json_file))
  27. if data['imageData']:
  28. imageData = data['imageData']
  29. else:
  30. imagePath = os.path.join(os.path.dirname(json_file), data['imagePath'])
  31. with open(imagePath, 'rb') as f:
  32. imageData = f.read()
  33. imageData = base64.b64encode(imageData).decode('utf-8')
  34. img = utils.img_b64_to_arr(imageData)
  35. label_name_to_value = {'_background_': 0}
  36. for shape in data['shapes']:
  37. label_name = shape['label']
  38. if label_name in label_name_to_value:
  39. label_value = label_name_to_value[label_name]
  40. else:
  41. label_value = len(label_name_to_value)
  42. label_name_to_value[label_name] = label_value
  43. # label_values must be dense
  44. label_values, label_names = [], []
  45. for ln, lv in sorted(label_name_to_value.items(), key=lambda x: x[1]):
  46. label_values.append(lv)
  47. label_names.append(ln)
  48. assert label_values == list(range(len(label_values)))
  49. lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
  50. captions = ['{}: {}'.format(lv, ln)
  51. for ln, lv in label_name_to_value.items()]
  52. lbl_viz = utils.draw_label(lbl, img, captions)
  53. PIL.Image.fromarray(img).save(osp.join(out_dir, 'img.png'))
  54. utils.lblsave(osp.join(out_dir, 'label.png'), lbl)
  55. PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, 'label_viz.png'))
  56. with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
  57. for lbl_name in label_names:
  58. f.write(lbl_name + '\n')
  59. warnings.warn('info.yaml is being replaced by label_names.txt')
  60. info = dict(label_names=label_names)
  61. with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
  62. yaml.safe_dump(info, f, default_flow_style=False)
  63. print('Saved to: %s' % out_dir)
  64. if __name__ == '__main__':
  65. main()