geometric.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. from typing import Optional, Union
  3. import cv2
  4. import mmcv
  5. import numpy as np
  6. from mmcv.transforms import BaseTransform
  7. from mmcv.transforms.utils import cache_randomness
  8. from mmdet.registry import TRANSFORMS
  9. from mmdet.structures.bbox import autocast_box_type
  10. from .augment_wrappers import _MAX_LEVEL, level_to_mag
  11. @TRANSFORMS.register_module()
  12. class GeomTransform(BaseTransform):
  13. """Base class for geometric transformations. All geometric transformations
  14. need to inherit from this base class. ``GeomTransform`` unifies the class
  15. attributes and class functions of geometric transformations (ShearX,
  16. ShearY, Rotate, TranslateX, and TranslateY), and records the homography
  17. matrix.
  18. Required Keys:
  19. - img
  20. - gt_bboxes (BaseBoxes[torch.float32]) (optional)
  21. - gt_masks (BitmapMasks | PolygonMasks) (optional)
  22. - gt_seg_map (np.uint8) (optional)
  23. Modified Keys:
  24. - img
  25. - gt_bboxes
  26. - gt_masks
  27. - gt_seg_map
  28. Added Keys:
  29. - homography_matrix
  30. Args:
  31. prob (float): The probability for performing the geometric
  32. transformation and should be in range [0, 1]. Defaults to 1.0.
  33. level (int, optional): The level should be in range [0, _MAX_LEVEL].
  34. If level is None, it will generate from [0, _MAX_LEVEL] randomly.
  35. Defaults to None.
  36. min_mag (float): The minimum magnitude for geometric transformation.
  37. Defaults to 0.0.
  38. max_mag (float): The maximum magnitude for geometric transformation.
  39. Defaults to 1.0.
  40. reversal_prob (float): The probability that reverses the geometric
  41. transformation magnitude. Should be in range [0,1].
  42. Defaults to 0.5.
  43. img_border_value (int | float | tuple): The filled values for
  44. image border. If float, the same fill value will be used for
  45. all the three channels of image. If tuple, it should be 3 elements.
  46. Defaults to 128.
  47. mask_border_value (int): The fill value used for masks. Defaults to 0.
  48. seg_ignore_label (int): The fill value used for segmentation map.
  49. Note this value must equals ``ignore_label`` in ``semantic_head``
  50. of the corresponding config. Defaults to 255.
  51. interpolation (str): Interpolation method, accepted values are
  52. "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
  53. backend, "nearest", "bilinear" for 'pillow' backend. Defaults
  54. to 'bilinear'.
  55. """
  56. def __init__(self,
  57. prob: float = 1.0,
  58. level: Optional[int] = None,
  59. min_mag: float = 0.0,
  60. max_mag: float = 1.0,
  61. reversal_prob: float = 0.5,
  62. img_border_value: Union[int, float, tuple] = 128,
  63. mask_border_value: int = 0,
  64. seg_ignore_label: int = 255,
  65. interpolation: str = 'bilinear') -> None:
  66. assert 0 <= prob <= 1.0, f'The probability of the transformation ' \
  67. f'should be in range [0,1], got {prob}.'
  68. assert level is None or isinstance(level, int), \
  69. f'The level should be None or type int, got {type(level)}.'
  70. assert level is None or 0 <= level <= _MAX_LEVEL, \
  71. f'The level should be in range [0,{_MAX_LEVEL}], got {level}.'
  72. assert isinstance(min_mag, float), \
  73. f'min_mag should be type float, got {type(min_mag)}.'
  74. assert isinstance(max_mag, float), \
  75. f'max_mag should be type float, got {type(max_mag)}.'
  76. assert min_mag <= max_mag, \
  77. f'min_mag should smaller than max_mag, ' \
  78. f'got min_mag={min_mag} and max_mag={max_mag}'
  79. assert isinstance(reversal_prob, float), \
  80. f'reversal_prob should be type float, got {type(max_mag)}.'
  81. assert 0 <= reversal_prob <= 1.0, \
  82. f'The reversal probability of the transformation magnitude ' \
  83. f'should be type float, got {type(reversal_prob)}.'
  84. if isinstance(img_border_value, (float, int)):
  85. img_border_value = tuple([float(img_border_value)] * 3)
  86. elif isinstance(img_border_value, tuple):
  87. assert len(img_border_value) == 3, \
  88. f'img_border_value as tuple must have 3 elements, ' \
  89. f'got {len(img_border_value)}.'
  90. img_border_value = tuple([float(val) for val in img_border_value])
  91. else:
  92. raise ValueError(
  93. 'img_border_value must be float or tuple with 3 elements.')
  94. assert np.all([0 <= val <= 255 for val in img_border_value]), 'all ' \
  95. 'elements of img_border_value should between range [0,255].' \
  96. f'got {img_border_value}.'
  97. self.prob = prob
  98. self.level = level
  99. self.min_mag = min_mag
  100. self.max_mag = max_mag
  101. self.reversal_prob = reversal_prob
  102. self.img_border_value = img_border_value
  103. self.mask_border_value = mask_border_value
  104. self.seg_ignore_label = seg_ignore_label
  105. self.interpolation = interpolation
  106. def _transform_img(self, results: dict, mag: float) -> None:
  107. """Transform the image."""
  108. pass
  109. def _transform_masks(self, results: dict, mag: float) -> None:
  110. """Transform the masks."""
  111. pass
  112. def _transform_seg(self, results: dict, mag: float) -> None:
  113. """Transform the segmentation map."""
  114. pass
  115. def _get_homography_matrix(self, results: dict, mag: float) -> np.ndarray:
  116. """Get the homography matrix for the geometric transformation."""
  117. return np.eye(3, dtype=np.float32)
  118. def _transform_bboxes(self, results: dict, mag: float) -> None:
  119. """Transform the bboxes."""
  120. results['gt_bboxes'].project_(self.homography_matrix)
  121. results['gt_bboxes'].clip_(results['img_shape'])
  122. def _record_homography_matrix(self, results: dict) -> None:
  123. """Record the homography matrix for the geometric transformation."""
  124. if results.get('homography_matrix', None) is None:
  125. results['homography_matrix'] = self.homography_matrix
  126. else:
  127. results['homography_matrix'] = self.homography_matrix @ results[
  128. 'homography_matrix']
  129. @cache_randomness
  130. def _random_disable(self):
  131. """Randomly disable the transform."""
  132. return np.random.rand() > self.prob
  133. @cache_randomness
  134. def _get_mag(self):
  135. """Get the magnitude of the transform."""
  136. mag = level_to_mag(self.level, self.min_mag, self.max_mag)
  137. return -mag if np.random.rand() > self.reversal_prob else mag
  138. @autocast_box_type()
  139. def transform(self, results: dict) -> dict:
  140. """Transform function for images, bounding boxes, masks and semantic
  141. segmentation map.
  142. Args:
  143. results (dict): Result dict from loading pipeline.
  144. Returns:
  145. dict: Transformed results.
  146. """
  147. if self._random_disable():
  148. return results
  149. mag = self._get_mag()
  150. self.homography_matrix = self._get_homography_matrix(results, mag)
  151. self._record_homography_matrix(results)
  152. self._transform_img(results, mag)
  153. if results.get('gt_bboxes', None) is not None:
  154. self._transform_bboxes(results, mag)
  155. if results.get('gt_masks', None) is not None:
  156. self._transform_masks(results, mag)
  157. if results.get('gt_seg_map', None) is not None:
  158. self._transform_seg(results, mag)
  159. return results
  160. def __repr__(self) -> str:
  161. repr_str = self.__class__.__name__
  162. repr_str += f'(prob={self.prob}, '
  163. repr_str += f'level={self.level}, '
  164. repr_str += f'min_mag={self.min_mag}, '
  165. repr_str += f'max_mag={self.max_mag}, '
  166. repr_str += f'reversal_prob={self.reversal_prob}, '
  167. repr_str += f'img_border_value={self.img_border_value}, '
  168. repr_str += f'mask_border_value={self.mask_border_value}, '
  169. repr_str += f'seg_ignore_label={self.seg_ignore_label}, '
  170. repr_str += f'interpolation={self.interpolation})'
  171. return repr_str
  172. @TRANSFORMS.register_module()
  173. class ShearX(GeomTransform):
  174. """Shear the images, bboxes, masks and segmentation map horizontally.
  175. Required Keys:
  176. - img
  177. - gt_bboxes (BaseBoxes[torch.float32]) (optional)
  178. - gt_masks (BitmapMasks | PolygonMasks) (optional)
  179. - gt_seg_map (np.uint8) (optional)
  180. Modified Keys:
  181. - img
  182. - gt_bboxes
  183. - gt_masks
  184. - gt_seg_map
  185. Added Keys:
  186. - homography_matrix
  187. Args:
  188. prob (float): The probability for performing Shear and should be in
  189. range [0, 1]. Defaults to 1.0.
  190. level (int, optional): The level should be in range [0, _MAX_LEVEL].
  191. If level is None, it will generate from [0, _MAX_LEVEL] randomly.
  192. Defaults to None.
  193. min_mag (float): The minimum angle for the horizontal shear.
  194. Defaults to 0.0.
  195. max_mag (float): The maximum angle for the horizontal shear.
  196. Defaults to 30.0.
  197. reversal_prob (float): The probability that reverses the horizontal
  198. shear magnitude. Should be in range [0,1]. Defaults to 0.5.
  199. img_border_value (int | float | tuple): The filled values for
  200. image border. If float, the same fill value will be used for
  201. all the three channels of image. If tuple, it should be 3 elements.
  202. Defaults to 128.
  203. mask_border_value (int): The fill value used for masks. Defaults to 0.
  204. seg_ignore_label (int): The fill value used for segmentation map.
  205. Note this value must equals ``ignore_label`` in ``semantic_head``
  206. of the corresponding config. Defaults to 255.
  207. interpolation (str): Interpolation method, accepted values are
  208. "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
  209. backend, "nearest", "bilinear" for 'pillow' backend. Defaults
  210. to 'bilinear'.
  211. """
  212. def __init__(self,
  213. prob: float = 1.0,
  214. level: Optional[int] = None,
  215. min_mag: float = 0.0,
  216. max_mag: float = 30.0,
  217. reversal_prob: float = 0.5,
  218. img_border_value: Union[int, float, tuple] = 128,
  219. mask_border_value: int = 0,
  220. seg_ignore_label: int = 255,
  221. interpolation: str = 'bilinear') -> None:
  222. assert 0. <= min_mag <= 90., \
  223. f'min_mag angle for ShearX should be ' \
  224. f'in range [0, 90], got {min_mag}.'
  225. assert 0. <= max_mag <= 90., \
  226. f'max_mag angle for ShearX should be ' \
  227. f'in range [0, 90], got {max_mag}.'
  228. super().__init__(
  229. prob=prob,
  230. level=level,
  231. min_mag=min_mag,
  232. max_mag=max_mag,
  233. reversal_prob=reversal_prob,
  234. img_border_value=img_border_value,
  235. mask_border_value=mask_border_value,
  236. seg_ignore_label=seg_ignore_label,
  237. interpolation=interpolation)
  238. @cache_randomness
  239. def _get_mag(self):
  240. """Get the magnitude of the transform."""
  241. mag = level_to_mag(self.level, self.min_mag, self.max_mag)
  242. mag = np.tan(mag * np.pi / 180)
  243. return -mag if np.random.rand() > self.reversal_prob else mag
  244. def _get_homography_matrix(self, results: dict, mag: float) -> np.ndarray:
  245. """Get the homography matrix for ShearX."""
  246. return np.array([[1, mag, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
  247. def _transform_img(self, results: dict, mag: float) -> None:
  248. """Shear the image horizontally."""
  249. results['img'] = mmcv.imshear(
  250. results['img'],
  251. mag,
  252. direction='horizontal',
  253. border_value=self.img_border_value,
  254. interpolation=self.interpolation)
  255. def _transform_masks(self, results: dict, mag: float) -> None:
  256. """Shear the masks horizontally."""
  257. results['gt_masks'] = results['gt_masks'].shear(
  258. results['img_shape'],
  259. mag,
  260. direction='horizontal',
  261. border_value=self.mask_border_value,
  262. interpolation=self.interpolation)
  263. def _transform_seg(self, results: dict, mag: float) -> None:
  264. """Shear the segmentation map horizontally."""
  265. results['gt_seg_map'] = mmcv.imshear(
  266. results['gt_seg_map'],
  267. mag,
  268. direction='horizontal',
  269. border_value=self.seg_ignore_label,
  270. interpolation='nearest')
  271. @TRANSFORMS.register_module()
  272. class ShearY(GeomTransform):
  273. """Shear the images, bboxes, masks and segmentation map vertically.
  274. Required Keys:
  275. - img
  276. - gt_bboxes (BaseBoxes[torch.float32]) (optional)
  277. - gt_masks (BitmapMasks | PolygonMasks) (optional)
  278. - gt_seg_map (np.uint8) (optional)
  279. Modified Keys:
  280. - img
  281. - gt_bboxes
  282. - gt_masks
  283. - gt_seg_map
  284. Added Keys:
  285. - homography_matrix
  286. Args:
  287. prob (float): The probability for performing ShearY and should be in
  288. range [0, 1]. Defaults to 1.0.
  289. level (int, optional): The level should be in range [0,_MAX_LEVEL].
  290. If level is None, it will generate from [0, _MAX_LEVEL] randomly.
  291. Defaults to None.
  292. min_mag (float): The minimum angle for the vertical shear.
  293. Defaults to 0.0.
  294. max_mag (float): The maximum angle for the vertical shear.
  295. Defaults to 30.0.
  296. reversal_prob (float): The probability that reverses the vertical
  297. shear magnitude. Should be in range [0,1]. Defaults to 0.5.
  298. img_border_value (int | float | tuple): The filled values for
  299. image border. If float, the same fill value will be used for
  300. all the three channels of image. If tuple, it should be 3 elements.
  301. Defaults to 128.
  302. mask_border_value (int): The fill value used for masks. Defaults to 0.
  303. seg_ignore_label (int): The fill value used for segmentation map.
  304. Note this value must equals ``ignore_label`` in ``semantic_head``
  305. of the corresponding config. Defaults to 255.
  306. interpolation (str): Interpolation method, accepted values are
  307. "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
  308. backend, "nearest", "bilinear" for 'pillow' backend. Defaults
  309. to 'bilinear'.
  310. """
  311. def __init__(self,
  312. prob: float = 1.0,
  313. level: Optional[int] = None,
  314. min_mag: float = 0.0,
  315. max_mag: float = 30.,
  316. reversal_prob: float = 0.5,
  317. img_border_value: Union[int, float, tuple] = 128,
  318. mask_border_value: int = 0,
  319. seg_ignore_label: int = 255,
  320. interpolation: str = 'bilinear') -> None:
  321. assert 0. <= min_mag <= 90., \
  322. f'min_mag angle for ShearY should be ' \
  323. f'in range [0, 90], got {min_mag}.'
  324. assert 0. <= max_mag <= 90., \
  325. f'max_mag angle for ShearY should be ' \
  326. f'in range [0, 90], got {max_mag}.'
  327. super().__init__(
  328. prob=prob,
  329. level=level,
  330. min_mag=min_mag,
  331. max_mag=max_mag,
  332. reversal_prob=reversal_prob,
  333. img_border_value=img_border_value,
  334. mask_border_value=mask_border_value,
  335. seg_ignore_label=seg_ignore_label,
  336. interpolation=interpolation)
  337. @cache_randomness
  338. def _get_mag(self):
  339. """Get the magnitude of the transform."""
  340. mag = level_to_mag(self.level, self.min_mag, self.max_mag)
  341. mag = np.tan(mag * np.pi / 180)
  342. return -mag if np.random.rand() > self.reversal_prob else mag
  343. def _get_homography_matrix(self, results: dict, mag: float) -> np.ndarray:
  344. """Get the homography matrix for ShearY."""
  345. return np.array([[1, 0, 0], [mag, 1, 0], [0, 0, 1]], dtype=np.float32)
  346. def _transform_img(self, results: dict, mag: float) -> None:
  347. """Shear the image vertically."""
  348. results['img'] = mmcv.imshear(
  349. results['img'],
  350. mag,
  351. direction='vertical',
  352. border_value=self.img_border_value,
  353. interpolation=self.interpolation)
  354. def _transform_masks(self, results: dict, mag: float) -> None:
  355. """Shear the masks vertically."""
  356. results['gt_masks'] = results['gt_masks'].shear(
  357. results['img_shape'],
  358. mag,
  359. direction='vertical',
  360. border_value=self.mask_border_value,
  361. interpolation=self.interpolation)
  362. def _transform_seg(self, results: dict, mag: float) -> None:
  363. """Shear the segmentation map vertically."""
  364. results['gt_seg_map'] = mmcv.imshear(
  365. results['gt_seg_map'],
  366. mag,
  367. direction='vertical',
  368. border_value=self.seg_ignore_label,
  369. interpolation='nearest')
  370. @TRANSFORMS.register_module()
  371. class Rotate(GeomTransform):
  372. """Rotate the images, bboxes, masks and segmentation map.
  373. Required Keys:
  374. - img
  375. - gt_bboxes (BaseBoxes[torch.float32]) (optional)
  376. - gt_masks (BitmapMasks | PolygonMasks) (optional)
  377. - gt_seg_map (np.uint8) (optional)
  378. Modified Keys:
  379. - img
  380. - gt_bboxes
  381. - gt_masks
  382. - gt_seg_map
  383. Added Keys:
  384. - homography_matrix
  385. Args:
  386. prob (float): The probability for perform transformation and
  387. should be in range 0 to 1. Defaults to 1.0.
  388. level (int, optional): The level should be in range [0, _MAX_LEVEL].
  389. If level is None, it will generate from [0, _MAX_LEVEL] randomly.
  390. Defaults to None.
  391. min_mag (float): The maximum angle for rotation.
  392. Defaults to 0.0.
  393. max_mag (float): The maximum angle for rotation.
  394. Defaults to 30.0.
  395. reversal_prob (float): The probability that reverses the rotation
  396. magnitude. Should be in range [0,1]. Defaults to 0.5.
  397. img_border_value (int | float | tuple): The filled values for
  398. image border. If float, the same fill value will be used for
  399. all the three channels of image. If tuple, it should be 3 elements.
  400. Defaults to 128.
  401. mask_border_value (int): The fill value used for masks. Defaults to 0.
  402. seg_ignore_label (int): The fill value used for segmentation map.
  403. Note this value must equals ``ignore_label`` in ``semantic_head``
  404. of the corresponding config. Defaults to 255.
  405. interpolation (str): Interpolation method, accepted values are
  406. "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
  407. backend, "nearest", "bilinear" for 'pillow' backend. Defaults
  408. to 'bilinear'.
  409. """
  410. def __init__(self,
  411. prob: float = 1.0,
  412. level: Optional[int] = None,
  413. min_mag: float = 0.0,
  414. max_mag: float = 30.0,
  415. reversal_prob: float = 0.5,
  416. img_border_value: Union[int, float, tuple] = 128,
  417. mask_border_value: int = 0,
  418. seg_ignore_label: int = 255,
  419. interpolation: str = 'bilinear') -> None:
  420. assert 0. <= min_mag <= 180., \
  421. f'min_mag for Rotate should be in range [0,180], got {min_mag}.'
  422. assert 0. <= max_mag <= 180., \
  423. f'max_mag for Rotate should be in range [0,180], got {max_mag}.'
  424. super().__init__(
  425. prob=prob,
  426. level=level,
  427. min_mag=min_mag,
  428. max_mag=max_mag,
  429. reversal_prob=reversal_prob,
  430. img_border_value=img_border_value,
  431. mask_border_value=mask_border_value,
  432. seg_ignore_label=seg_ignore_label,
  433. interpolation=interpolation)
  434. def _get_homography_matrix(self, results: dict, mag: float) -> np.ndarray:
  435. """Get the homography matrix for Rotate."""
  436. img_shape = results['img_shape']
  437. center = ((img_shape[1] - 1) * 0.5, (img_shape[0] - 1) * 0.5)
  438. cv2_rotation_matrix = cv2.getRotationMatrix2D(center, -mag, 1.0)
  439. return np.concatenate(
  440. [cv2_rotation_matrix,
  441. np.array([0, 0, 1]).reshape((1, 3))]).astype(np.float32)
  442. def _transform_img(self, results: dict, mag: float) -> None:
  443. """Rotate the image."""
  444. results['img'] = mmcv.imrotate(
  445. results['img'],
  446. mag,
  447. border_value=self.img_border_value,
  448. interpolation=self.interpolation)
  449. def _transform_masks(self, results: dict, mag: float) -> None:
  450. """Rotate the masks."""
  451. results['gt_masks'] = results['gt_masks'].rotate(
  452. results['img_shape'],
  453. mag,
  454. border_value=self.mask_border_value,
  455. interpolation=self.interpolation)
  456. def _transform_seg(self, results: dict, mag: float) -> None:
  457. """Rotate the segmentation map."""
  458. results['gt_seg_map'] = mmcv.imrotate(
  459. results['gt_seg_map'],
  460. mag,
  461. border_value=self.seg_ignore_label,
  462. interpolation='nearest')
  463. @TRANSFORMS.register_module()
  464. class TranslateX(GeomTransform):
  465. """Translate the images, bboxes, masks and segmentation map horizontally.
  466. Required Keys:
  467. - img
  468. - gt_bboxes (BaseBoxes[torch.float32]) (optional)
  469. - gt_masks (BitmapMasks | PolygonMasks) (optional)
  470. - gt_seg_map (np.uint8) (optional)
  471. Modified Keys:
  472. - img
  473. - gt_bboxes
  474. - gt_masks
  475. - gt_seg_map
  476. Added Keys:
  477. - homography_matrix
  478. Args:
  479. prob (float): The probability for perform transformation and
  480. should be in range 0 to 1. Defaults to 1.0.
  481. level (int, optional): The level should be in range [0, _MAX_LEVEL].
  482. If level is None, it will generate from [0, _MAX_LEVEL] randomly.
  483. Defaults to None.
  484. min_mag (float): The minimum pixel's offset ratio for horizontal
  485. translation. Defaults to 0.0.
  486. max_mag (float): The maximum pixel's offset ratio for horizontal
  487. translation. Defaults to 0.1.
  488. reversal_prob (float): The probability that reverses the horizontal
  489. translation magnitude. Should be in range [0,1]. Defaults to 0.5.
  490. img_border_value (int | float | tuple): The filled values for
  491. image border. If float, the same fill value will be used for
  492. all the three channels of image. If tuple, it should be 3 elements.
  493. Defaults to 128.
  494. mask_border_value (int): The fill value used for masks. Defaults to 0.
  495. seg_ignore_label (int): The fill value used for segmentation map.
  496. Note this value must equals ``ignore_label`` in ``semantic_head``
  497. of the corresponding config. Defaults to 255.
  498. interpolation (str): Interpolation method, accepted values are
  499. "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
  500. backend, "nearest", "bilinear" for 'pillow' backend. Defaults
  501. to 'bilinear'.
  502. """
  503. def __init__(self,
  504. prob: float = 1.0,
  505. level: Optional[int] = None,
  506. min_mag: float = 0.0,
  507. max_mag: float = 0.1,
  508. reversal_prob: float = 0.5,
  509. img_border_value: Union[int, float, tuple] = 128,
  510. mask_border_value: int = 0,
  511. seg_ignore_label: int = 255,
  512. interpolation: str = 'bilinear') -> None:
  513. assert 0. <= min_mag <= 1., \
  514. f'min_mag ratio for TranslateX should be ' \
  515. f'in range [0, 1], got {min_mag}.'
  516. assert 0. <= max_mag <= 1., \
  517. f'max_mag ratio for TranslateX should be ' \
  518. f'in range [0, 1], got {max_mag}.'
  519. super().__init__(
  520. prob=prob,
  521. level=level,
  522. min_mag=min_mag,
  523. max_mag=max_mag,
  524. reversal_prob=reversal_prob,
  525. img_border_value=img_border_value,
  526. mask_border_value=mask_border_value,
  527. seg_ignore_label=seg_ignore_label,
  528. interpolation=interpolation)
  529. def _get_homography_matrix(self, results: dict, mag: float) -> np.ndarray:
  530. """Get the homography matrix for TranslateX."""
  531. mag = int(results['img_shape'][1] * mag)
  532. return np.array([[1, 0, mag], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
  533. def _transform_img(self, results: dict, mag: float) -> None:
  534. """Translate the image horizontally."""
  535. mag = int(results['img_shape'][1] * mag)
  536. results['img'] = mmcv.imtranslate(
  537. results['img'],
  538. mag,
  539. direction='horizontal',
  540. border_value=self.img_border_value,
  541. interpolation=self.interpolation)
  542. def _transform_masks(self, results: dict, mag: float) -> None:
  543. """Translate the masks horizontally."""
  544. mag = int(results['img_shape'][1] * mag)
  545. results['gt_masks'] = results['gt_masks'].translate(
  546. results['img_shape'],
  547. mag,
  548. direction='horizontal',
  549. border_value=self.mask_border_value,
  550. interpolation=self.interpolation)
  551. def _transform_seg(self, results: dict, mag: float) -> None:
  552. """Translate the segmentation map horizontally."""
  553. mag = int(results['img_shape'][1] * mag)
  554. results['gt_seg_map'] = mmcv.imtranslate(
  555. results['gt_seg_map'],
  556. mag,
  557. direction='horizontal',
  558. border_value=self.seg_ignore_label,
  559. interpolation='nearest')
  560. @TRANSFORMS.register_module()
  561. class TranslateY(GeomTransform):
  562. """Translate the images, bboxes, masks and segmentation map vertically.
  563. Required Keys:
  564. - img
  565. - gt_bboxes (BaseBoxes[torch.float32]) (optional)
  566. - gt_masks (BitmapMasks | PolygonMasks) (optional)
  567. - gt_seg_map (np.uint8) (optional)
  568. Modified Keys:
  569. - img
  570. - gt_bboxes
  571. - gt_masks
  572. - gt_seg_map
  573. Added Keys:
  574. - homography_matrix
  575. Args:
  576. prob (float): The probability for perform transformation and
  577. should be in range 0 to 1. Defaults to 1.0.
  578. level (int, optional): The level should be in range [0, _MAX_LEVEL].
  579. If level is None, it will generate from [0, _MAX_LEVEL] randomly.
  580. Defaults to None.
  581. min_mag (float): The minimum pixel's offset ratio for vertical
  582. translation. Defaults to 0.0.
  583. max_mag (float): The maximum pixel's offset ratio for vertical
  584. translation. Defaults to 0.1.
  585. reversal_prob (float): The probability that reverses the vertical
  586. translation magnitude. Should be in range [0,1]. Defaults to 0.5.
  587. img_border_value (int | float | tuple): The filled values for
  588. image border. If float, the same fill value will be used for
  589. all the three channels of image. If tuple, it should be 3 elements.
  590. Defaults to 128.
  591. mask_border_value (int): The fill value used for masks. Defaults to 0.
  592. seg_ignore_label (int): The fill value used for segmentation map.
  593. Note this value must equals ``ignore_label`` in ``semantic_head``
  594. of the corresponding config. Defaults to 255.
  595. interpolation (str): Interpolation method, accepted values are
  596. "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
  597. backend, "nearest", "bilinear" for 'pillow' backend. Defaults
  598. to 'bilinear'.
  599. """
  600. def __init__(self,
  601. prob: float = 1.0,
  602. level: Optional[int] = None,
  603. min_mag: float = 0.0,
  604. max_mag: float = 0.1,
  605. reversal_prob: float = 0.5,
  606. img_border_value: Union[int, float, tuple] = 128,
  607. mask_border_value: int = 0,
  608. seg_ignore_label: int = 255,
  609. interpolation: str = 'bilinear') -> None:
  610. assert 0. <= min_mag <= 1., \
  611. f'min_mag ratio for TranslateY should be ' \
  612. f'in range [0,1], got {min_mag}.'
  613. assert 0. <= max_mag <= 1., \
  614. f'max_mag ratio for TranslateY should be ' \
  615. f'in range [0,1], got {max_mag}.'
  616. super().__init__(
  617. prob=prob,
  618. level=level,
  619. min_mag=min_mag,
  620. max_mag=max_mag,
  621. reversal_prob=reversal_prob,
  622. img_border_value=img_border_value,
  623. mask_border_value=mask_border_value,
  624. seg_ignore_label=seg_ignore_label,
  625. interpolation=interpolation)
  626. def _get_homography_matrix(self, results: dict, mag: float) -> np.ndarray:
  627. """Get the homography matrix for TranslateY."""
  628. mag = int(results['img_shape'][0] * mag)
  629. return np.array([[1, 0, 0], [0, 1, mag], [0, 0, 1]], dtype=np.float32)
  630. def _transform_img(self, results: dict, mag: float) -> None:
  631. """Translate the image vertically."""
  632. mag = int(results['img_shape'][0] * mag)
  633. results['img'] = mmcv.imtranslate(
  634. results['img'],
  635. mag,
  636. direction='vertical',
  637. border_value=self.img_border_value,
  638. interpolation=self.interpolation)
  639. def _transform_masks(self, results: dict, mag: float) -> None:
  640. """Translate masks vertically."""
  641. mag = int(results['img_shape'][0] * mag)
  642. results['gt_masks'] = results['gt_masks'].translate(
  643. results['img_shape'],
  644. mag,
  645. direction='vertical',
  646. border_value=self.mask_border_value,
  647. interpolation=self.interpolation)
  648. def _transform_seg(self, results: dict, mag: float) -> None:
  649. """Translate segmentation map vertically."""
  650. mag = int(results['img_shape'][0] * mag)
  651. results['gt_seg_map'] = mmcv.imtranslate(
  652. results['gt_seg_map'],
  653. mag,
  654. direction='vertical',
  655. border_value=self.seg_ignore_label,
  656. interpolation='nearest')