소스 검색

Add labelme2coco.py for instance_segmentation

Close https://github.com/wkentaro/labelme/issues/34
Kentaro Wada 6 년 전
부모
커밋
f4dfc47a55

+ 1 - 0
README.md

@@ -31,6 +31,7 @@ It is written in Python and uses Qt for its graphical interface.
 - [x] Video annotation. ([video annotation](examples/video_annotation))
 - [x] GUI customization (predefined labels / flags, auto-saving, label validation, etc). ([#144](https://github.com/wkentaro/labelme/pull/144))
 - [x] Exporting VOC-like dataset for semantic/instance segmentation. ([semantic segmentation](examples/semantic_segmentation), [instance segmentation](examples/instance_segmentation))
+- [x] Exporting COCO-like dataset for instance segmentation. ([instance segmentation](examples/instance_segmentation))
 
 
 

+ 10 - 0
examples/instance_segmentation/README.md

@@ -34,3 +34,13 @@ labelme_draw_label_png data_dataset_voc/SegmentationObjectPNG/2011_000003.png  #
 ```
 
 <img src=".readme/draw_label_png_class.jpg" width="33%" /> <img src=".readme/draw_label_png_object.jpg" width="33%" />
+
+
+## Convert to COCO-like Dataset
+
+```bash
+# It generates:
+#   - data_dataset_coco/JPEGImages
+#   - data_dataset_coco/annotations.json
+./labelme2coco.py data_annotated data_dataset_coco --labels labels.txt
+```

BIN
examples/instance_segmentation/data_dataset_coco/JPEGImages/2011_000003.jpg


BIN
examples/instance_segmentation/data_dataset_coco/JPEGImages/2011_000006.jpg


BIN
examples/instance_segmentation/data_dataset_coco/JPEGImages/2011_000025.jpg


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
examples/instance_segmentation/data_dataset_coco/annotations.json


+ 146 - 0
examples/instance_segmentation/labelme2coco.py

@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+
+import argparse
+import datetime
+import glob
+import json
+import os
+import os.path as osp
+import sys
+
+import numpy as np
+import PIL.Image
+
+import labelme
+
+try:
+    import pycocotools.mask
+except ImportError:
+    print('Please install pycocotools:\n\n    pip install pycocotools\n')
+    sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument('input_dir', help='input annotated directory')
+    parser.add_argument('output_dir', help='output dataset directory')
+    parser.add_argument('--labels', help='labels file')
+    args = parser.parse_args()
+
+    if osp.exists(args.output_dir):
+        print('Output directory already exists:', args.output_dir)
+        sys.exit(1)
+    os.makedirs(args.output_dir)
+    os.makedirs(osp.join(args.output_dir, 'JPEGImages'))
+    print('Creating dataset:', args.output_dir)
+
+    now = datetime.datetime.now()
+
+    data = dict(
+        info=dict(
+            description=None,
+            url=None,
+            version=None,
+            year=now.year,
+            contributor=None,
+            date_created=now.strftime('%Y-%m-%d %H:%M:%S.%f'),
+        ),
+        licenses=[dict(
+            url=None,
+            id=0,
+            name=None,
+        )],
+        images=[
+            # license, url, file_name, height, width, date_captured, id
+        ],
+        type='instances',
+        annotations=[
+            # segmentation, area, iscrowd, image_id, bbox, category_id, id
+        ],
+        categories=[
+            # supercategory, id, name
+        ],
+    )
+
+    class_name_to_id = {}
+    for i, line in enumerate(open(args.labels).readlines()):
+        class_id = i - 1  # starts with -1
+        class_name = line.strip()
+        if class_id == -1:
+            assert class_name == '__ignore__'
+            continue
+        elif class_id == 0:
+            assert class_name == '_background_'
+        class_name_to_id[class_name] = class_id
+        data['categories'].append(dict(
+            supercategory=None,
+            id=class_id,
+            name=class_name,
+        ))
+
+    out_ann_file = osp.join(args.output_dir, 'annotations.json')
+    label_files = glob.glob(osp.join(args.input_dir, '*.json'))
+    for image_id, label_file in enumerate(label_files):
+        print('Generating dataset from:', label_file)
+        with open(label_file) as f:
+            label_data = json.load(f)
+
+        base = osp.splitext(osp.basename(label_file))[0]
+        out_img_file = osp.join(
+            args.output_dir, 'JPEGImages', base + '.jpg'
+        )
+
+        img_file = osp.join(
+            osp.dirname(label_file), label_data['imagePath']
+        )
+        img = np.asarray(PIL.Image.open(img_file))
+        PIL.Image.fromarray(img).save(out_img_file)
+        data['images'].append(dict(
+            license=0,
+            url=None,
+            file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),
+            height=img.shape[0],
+            width=img.shape[1],
+            date_captured=None,
+            id=image_id,
+        ))
+
+        masks = {}
+        for shape in label_data['shapes']:
+            points = shape['points']
+            label = shape['label']
+            shape_type = shape.get('shape_type', None)
+            mask = labelme.utils.shape_to_mask(
+                img.shape[:2], points, shape_type
+            )
+
+            mask = np.asfortranarray(mask.astype(np.uint8))
+            if label in masks:
+                masks[label] = masks[label] | mask
+            else:
+                masks[label] = mask
+
+        for label, mask in masks.items():
+            cls_name = label.split('-')[0]
+            if cls_name not in class_name_to_id:
+                continue
+            cls_id = class_name_to_id[cls_name]
+            segmentation = pycocotools.mask.encode(mask)
+            segmentation['counts'] = segmentation['counts'].decode()
+            area = float(pycocotools.mask.area(segmentation))
+            data['annotations'].append(dict(
+                segmentation=segmentation,
+                area=area,
+                iscrowd=None,
+                image_id=image_id,
+                category_id=cls_id,
+            ))
+
+    with open(out_ann_file, 'w') as f:
+        json.dump(data, f)
+
+
+if __name__ == '__main__':
+    main()

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.