delta_xywh_bbox_coder.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. import warnings
  3. from typing import Optional, Sequence, Union
  4. import numpy as np
  5. import torch
  6. from torch import Tensor
  7. from mmdet.registry import TASK_UTILS
  8. from mmdet.structures.bbox import BaseBoxes, HorizontalBoxes, get_box_tensor
  9. from .base_bbox_coder import BaseBBoxCoder
  10. @TASK_UTILS.register_module()
  11. class DeltaXYWHBBoxCoder(BaseBBoxCoder):
  12. """Delta XYWH BBox coder.
  13. Following the practice in `R-CNN <https://arxiv.org/abs/1311.2524>`_,
  14. this coder encodes bbox (x1, y1, x2, y2) into delta (dx, dy, dw, dh) and
  15. decodes delta (dx, dy, dw, dh) back to original bbox (x1, y1, x2, y2).
  16. Args:
  17. target_means (Sequence[float]): Denormalizing means of target for
  18. delta coordinates
  19. target_stds (Sequence[float]): Denormalizing standard deviation of
  20. target for delta coordinates
  21. clip_border (bool, optional): Whether clip the objects outside the
  22. border of the image. Defaults to True.
  23. add_ctr_clamp (bool): Whether to add center clamp, when added, the
  24. predicted box is clamped is its center is too far away from
  25. the original anchor's center. Only used by YOLOF. Default False.
  26. ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF.
  27. Default 32.
  28. """
  29. def __init__(self,
  30. target_means: Sequence[float] = (0., 0., 0., 0.),
  31. target_stds: Sequence[float] = (1., 1., 1., 1.),
  32. clip_border: bool = True,
  33. add_ctr_clamp: bool = False,
  34. ctr_clamp: int = 32,
  35. **kwargs) -> None:
  36. super().__init__(**kwargs)
  37. self.means = target_means
  38. self.stds = target_stds
  39. self.clip_border = clip_border
  40. self.add_ctr_clamp = add_ctr_clamp
  41. self.ctr_clamp = ctr_clamp
  42. def encode(self, bboxes: Union[Tensor, BaseBoxes],
  43. gt_bboxes: Union[Tensor, BaseBoxes]) -> Tensor:
  44. """Get box regression transformation deltas that can be used to
  45. transform the ``bboxes`` into the ``gt_bboxes``.
  46. Args:
  47. bboxes (torch.Tensor or :obj:`BaseBoxes`): Source boxes,
  48. e.g., object proposals.
  49. gt_bboxes (torch.Tensor or :obj:`BaseBoxes`): Target of the
  50. transformation, e.g., ground-truth boxes.
  51. Returns:
  52. torch.Tensor: Box transformation deltas
  53. """
  54. bboxes = get_box_tensor(bboxes)
  55. gt_bboxes = get_box_tensor(gt_bboxes)
  56. assert bboxes.size(0) == gt_bboxes.size(0)
  57. assert bboxes.size(-1) == gt_bboxes.size(-1) == 4
  58. encoded_bboxes = bbox2delta(bboxes, gt_bboxes, self.means, self.stds)
  59. return encoded_bboxes
  60. def decode(
  61. self,
  62. bboxes: Union[Tensor, BaseBoxes],
  63. pred_bboxes: Tensor,
  64. max_shape: Optional[Union[Sequence[int], Tensor,
  65. Sequence[Sequence[int]]]] = None,
  66. wh_ratio_clip: Optional[float] = 16 / 1000
  67. ) -> Union[Tensor, BaseBoxes]:
  68. """Apply transformation `pred_bboxes` to `boxes`.
  69. Args:
  70. bboxes (torch.Tensor or :obj:`BaseBoxes`): Basic boxes. Shape
  71. (B, N, 4) or (N, 4)
  72. pred_bboxes (Tensor): Encoded offsets with respect to each roi.
  73. Has shape (B, N, num_classes * 4) or (B, N, 4) or
  74. (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H
  75. when rois is a grid of anchors.Offset encoding follows [1]_.
  76. max_shape (Sequence[int] or torch.Tensor or Sequence[
  77. Sequence[int]],optional): Maximum bounds for boxes, specifies
  78. (H, W, C) or (H, W). If bboxes shape is (B, N, 4), then
  79. the max_shape should be a Sequence[Sequence[int]]
  80. and the length of max_shape should also be B.
  81. wh_ratio_clip (float, optional): The allowed ratio between
  82. width and height.
  83. Returns:
  84. Union[torch.Tensor, :obj:`BaseBoxes`]: Decoded boxes.
  85. """
  86. bboxes = get_box_tensor(bboxes)
  87. assert pred_bboxes.size(0) == bboxes.size(0)
  88. if pred_bboxes.ndim == 3:
  89. assert pred_bboxes.size(1) == bboxes.size(1)
  90. if pred_bboxes.ndim == 2 and not torch.onnx.is_in_onnx_export():
  91. # single image decode
  92. decoded_bboxes = delta2bbox(bboxes, pred_bboxes, self.means,
  93. self.stds, max_shape, wh_ratio_clip,
  94. self.clip_border, self.add_ctr_clamp,
  95. self.ctr_clamp)
  96. else:
  97. if pred_bboxes.ndim == 3 and not torch.onnx.is_in_onnx_export():
  98. warnings.warn(
  99. 'DeprecationWarning: onnx_delta2bbox is deprecated '
  100. 'in the case of batch decoding and non-ONNX, '
  101. 'please use “delta2bbox” instead. In order to improve '
  102. 'the decoding speed, the batch function will no '
  103. 'longer be supported. ')
  104. decoded_bboxes = onnx_delta2bbox(bboxes, pred_bboxes, self.means,
  105. self.stds, max_shape,
  106. wh_ratio_clip, self.clip_border,
  107. self.add_ctr_clamp,
  108. self.ctr_clamp)
  109. if self.use_box_type:
  110. assert decoded_bboxes.size(-1) == 4, \
  111. ('Cannot warp decoded boxes with box type when decoded boxes'
  112. 'have shape of (N, num_classes * 4)')
  113. decoded_bboxes = HorizontalBoxes(decoded_bboxes)
  114. return decoded_bboxes
  115. @TASK_UTILS.register_module()
  116. class DeltaXYWHBBoxCoderForGLIP(DeltaXYWHBBoxCoder):
  117. """This is designed specifically for the GLIP algorithm.
  118. In order to completely match the official performance, we need to perform
  119. special calculations in the encoding and decoding processes, such as
  120. additional +1 and -1 calculations. However, this is not a user-friendly
  121. design.
  122. """
  123. def encode(self, bboxes: Union[Tensor, BaseBoxes],
  124. gt_bboxes: Union[Tensor, BaseBoxes]) -> Tensor:
  125. """Get box regression transformation deltas that can be used to
  126. transform the ``bboxes`` into the ``gt_bboxes``.
  127. Args:
  128. bboxes (torch.Tensor or :obj:`BaseBoxes`): Source boxes,
  129. e.g., object proposals.
  130. gt_bboxes (torch.Tensor or :obj:`BaseBoxes`): Target of the
  131. transformation, e.g., ground-truth boxes.
  132. Returns:
  133. torch.Tensor: Box transformation deltas
  134. """
  135. bboxes = get_box_tensor(bboxes)
  136. gt_bboxes = get_box_tensor(gt_bboxes)
  137. assert bboxes.size(0) == gt_bboxes.size(0)
  138. assert bboxes.size(-1) == gt_bboxes.size(-1) == 4
  139. encoded_bboxes = bbox2delta(bboxes, gt_bboxes, self.means, self.stds)
  140. return encoded_bboxes
  141. def decode(
  142. self,
  143. bboxes: Union[Tensor, BaseBoxes],
  144. pred_bboxes: Tensor,
  145. max_shape: Optional[Union[Sequence[int], Tensor,
  146. Sequence[Sequence[int]]]] = None,
  147. wh_ratio_clip: Optional[float] = 16 / 1000
  148. ) -> Union[Tensor, BaseBoxes]:
  149. """Apply transformation `pred_bboxes` to `boxes`.
  150. Args:
  151. bboxes (torch.Tensor or :obj:`BaseBoxes`): Basic boxes. Shape
  152. (B, N, 4) or (N, 4)
  153. pred_bboxes (Tensor): Encoded offsets with respect to each roi.
  154. Has shape (B, N, num_classes * 4) or (B, N, 4) or
  155. (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H
  156. when rois is a grid of anchors.Offset encoding follows [1]_.
  157. max_shape (Sequence[int] or torch.Tensor or Sequence[
  158. Sequence[int]],optional): Maximum bounds for boxes, specifies
  159. (H, W, C) or (H, W). If bboxes shape is (B, N, 4), then
  160. the max_shape should be a Sequence[Sequence[int]]
  161. and the length of max_shape should also be B.
  162. wh_ratio_clip (float, optional): The allowed ratio between
  163. width and height.
  164. Returns:
  165. Union[torch.Tensor, :obj:`BaseBoxes`]: Decoded boxes.
  166. """
  167. bboxes = get_box_tensor(bboxes)
  168. assert pred_bboxes.size(0) == bboxes.size(0)
  169. if pred_bboxes.ndim == 3:
  170. assert pred_bboxes.size(1) == bboxes.size(1)
  171. if pred_bboxes.ndim == 2 and not torch.onnx.is_in_onnx_export():
  172. # single image decode
  173. decoded_bboxes = delta2bbox_glip(bboxes, pred_bboxes, self.means,
  174. self.stds, max_shape,
  175. wh_ratio_clip, self.clip_border,
  176. self.add_ctr_clamp,
  177. self.ctr_clamp)
  178. else:
  179. raise NotImplementedError()
  180. if self.use_box_type:
  181. assert decoded_bboxes.size(-1) == 4, \
  182. ('Cannot warp decoded boxes with box type when decoded boxes'
  183. 'have shape of (N, num_classes * 4)')
  184. decoded_bboxes = HorizontalBoxes(decoded_bboxes)
  185. return decoded_bboxes
  186. def bbox2delta(
  187. proposals: Tensor,
  188. gt: Tensor,
  189. means: Sequence[float] = (0., 0., 0., 0.),
  190. stds: Sequence[float] = (1., 1., 1., 1.)
  191. ) -> Tensor:
  192. """Compute deltas of proposals w.r.t. gt.
  193. We usually compute the deltas of x, y, w, h of proposals w.r.t ground
  194. truth bboxes to get regression target.
  195. This is the inverse function of :func:`delta2bbox`.
  196. Args:
  197. proposals (Tensor): Boxes to be transformed, shape (N, ..., 4)
  198. gt (Tensor): Gt bboxes to be used as base, shape (N, ..., 4)
  199. means (Sequence[float]): Denormalizing means for delta coordinates
  200. stds (Sequence[float]): Denormalizing standard deviation for delta
  201. coordinates
  202. Returns:
  203. Tensor: deltas with shape (N, 4), where columns represent dx, dy,
  204. dw, dh.
  205. """
  206. assert proposals.size() == gt.size()
  207. proposals = proposals.float()
  208. gt = gt.float()
  209. px = (proposals[..., 0] + proposals[..., 2]) * 0.5
  210. py = (proposals[..., 1] + proposals[..., 3]) * 0.5
  211. pw = proposals[..., 2] - proposals[..., 0]
  212. ph = proposals[..., 3] - proposals[..., 1]
  213. gx = (gt[..., 0] + gt[..., 2]) * 0.5
  214. gy = (gt[..., 1] + gt[..., 3]) * 0.5
  215. gw = gt[..., 2] - gt[..., 0]
  216. gh = gt[..., 3] - gt[..., 1]
  217. dx = (gx - px) / pw
  218. dy = (gy - py) / ph
  219. dw = torch.log(gw / pw)
  220. dh = torch.log(gh / ph)
  221. deltas = torch.stack([dx, dy, dw, dh], dim=-1)
  222. means = deltas.new_tensor(means).unsqueeze(0)
  223. stds = deltas.new_tensor(stds).unsqueeze(0)
  224. deltas = deltas.sub_(means).div_(stds)
  225. return deltas
  226. def delta2bbox(rois: Tensor,
  227. deltas: Tensor,
  228. means: Sequence[float] = (0., 0., 0., 0.),
  229. stds: Sequence[float] = (1., 1., 1., 1.),
  230. max_shape: Optional[Union[Sequence[int], Tensor,
  231. Sequence[Sequence[int]]]] = None,
  232. wh_ratio_clip: float = 16 / 1000,
  233. clip_border: bool = True,
  234. add_ctr_clamp: bool = False,
  235. ctr_clamp: int = 32) -> Tensor:
  236. """Apply deltas to shift/scale base boxes.
  237. Typically the rois are anchor or proposed bounding boxes and the deltas are
  238. network outputs used to shift/scale those boxes.
  239. This is the inverse function of :func:`bbox2delta`.
  240. Args:
  241. rois (Tensor): Boxes to be transformed. Has shape (N, 4).
  242. deltas (Tensor): Encoded offsets relative to each roi.
  243. Has shape (N, num_classes * 4) or (N, 4). Note
  244. N = num_base_anchors * W * H, when rois is a grid of
  245. anchors. Offset encoding follows [1]_.
  246. means (Sequence[float]): Denormalizing means for delta coordinates.
  247. Default (0., 0., 0., 0.).
  248. stds (Sequence[float]): Denormalizing standard deviation for delta
  249. coordinates. Default (1., 1., 1., 1.).
  250. max_shape (tuple[int, int]): Maximum bounds for boxes, specifies
  251. (H, W). Default None.
  252. wh_ratio_clip (float): Maximum aspect ratio for boxes. Default
  253. 16 / 1000.
  254. clip_border (bool, optional): Whether clip the objects outside the
  255. border of the image. Default True.
  256. add_ctr_clamp (bool): Whether to add center clamp. When set to True,
  257. the center of the prediction bounding box will be clamped to
  258. avoid being too far away from the center of the anchor.
  259. Only used by YOLOF. Default False.
  260. ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF.
  261. Default 32.
  262. Returns:
  263. Tensor: Boxes with shape (N, num_classes * 4) or (N, 4), where 4
  264. represent tl_x, tl_y, br_x, br_y.
  265. References:
  266. .. [1] https://arxiv.org/abs/1311.2524
  267. Example:
  268. >>> rois = torch.Tensor([[ 0., 0., 1., 1.],
  269. >>> [ 0., 0., 1., 1.],
  270. >>> [ 0., 0., 1., 1.],
  271. >>> [ 5., 5., 5., 5.]])
  272. >>> deltas = torch.Tensor([[ 0., 0., 0., 0.],
  273. >>> [ 1., 1., 1., 1.],
  274. >>> [ 0., 0., 2., -1.],
  275. >>> [ 0.7, -1.9, -0.5, 0.3]])
  276. >>> delta2bbox(rois, deltas, max_shape=(32, 32, 3))
  277. tensor([[0.0000, 0.0000, 1.0000, 1.0000],
  278. [0.1409, 0.1409, 2.8591, 2.8591],
  279. [0.0000, 0.3161, 4.1945, 0.6839],
  280. [5.0000, 5.0000, 5.0000, 5.0000]])
  281. """
  282. num_bboxes, num_classes = deltas.size(0), deltas.size(1) // 4
  283. if num_bboxes == 0:
  284. return deltas
  285. deltas = deltas.reshape(-1, 4)
  286. means = deltas.new_tensor(means).view(1, -1)
  287. stds = deltas.new_tensor(stds).view(1, -1)
  288. denorm_deltas = deltas * stds + means
  289. dxy = denorm_deltas[:, :2]
  290. dwh = denorm_deltas[:, 2:]
  291. # Compute width/height of each roi
  292. rois_ = rois.repeat(1, num_classes).reshape(-1, 4)
  293. pxy = ((rois_[:, :2] + rois_[:, 2:]) * 0.5)
  294. pwh = (rois_[:, 2:] - rois_[:, :2])
  295. dxy_wh = pwh * dxy
  296. max_ratio = np.abs(np.log(wh_ratio_clip))
  297. if add_ctr_clamp:
  298. dxy_wh = torch.clamp(dxy_wh, max=ctr_clamp, min=-ctr_clamp)
  299. dwh = torch.clamp(dwh, max=max_ratio)
  300. else:
  301. dwh = dwh.clamp(min=-max_ratio, max=max_ratio)
  302. gxy = pxy + dxy_wh
  303. gwh = pwh * dwh.exp()
  304. x1y1 = gxy - (gwh * 0.5)
  305. x2y2 = gxy + (gwh * 0.5)
  306. bboxes = torch.cat([x1y1, x2y2], dim=-1)
  307. if clip_border and max_shape is not None:
  308. bboxes[..., 0::2].clamp_(min=0, max=max_shape[1])
  309. bboxes[..., 1::2].clamp_(min=0, max=max_shape[0])
  310. bboxes = bboxes.reshape(num_bboxes, -1)
  311. return bboxes
  312. def onnx_delta2bbox(rois: Tensor,
  313. deltas: Tensor,
  314. means: Sequence[float] = (0., 0., 0., 0.),
  315. stds: Sequence[float] = (1., 1., 1., 1.),
  316. max_shape: Optional[Union[Sequence[int], Tensor,
  317. Sequence[Sequence[int]]]] = None,
  318. wh_ratio_clip: float = 16 / 1000,
  319. clip_border: Optional[bool] = True,
  320. add_ctr_clamp: bool = False,
  321. ctr_clamp: int = 32) -> Tensor:
  322. """Apply deltas to shift/scale base boxes.
  323. Typically the rois are anchor or proposed bounding boxes and the deltas are
  324. network outputs used to shift/scale those boxes.
  325. This is the inverse function of :func:`bbox2delta`.
  326. Args:
  327. rois (Tensor): Boxes to be transformed. Has shape (N, 4) or (B, N, 4)
  328. deltas (Tensor): Encoded offsets with respect to each roi.
  329. Has shape (B, N, num_classes * 4) or (B, N, 4) or
  330. (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H
  331. when rois is a grid of anchors.Offset encoding follows [1]_.
  332. means (Sequence[float]): Denormalizing means for delta coordinates.
  333. Default (0., 0., 0., 0.).
  334. stds (Sequence[float]): Denormalizing standard deviation for delta
  335. coordinates. Default (1., 1., 1., 1.).
  336. max_shape (Sequence[int] or torch.Tensor or Sequence[
  337. Sequence[int]],optional): Maximum bounds for boxes, specifies
  338. (H, W, C) or (H, W). If rois shape is (B, N, 4), then
  339. the max_shape should be a Sequence[Sequence[int]]
  340. and the length of max_shape should also be B. Default None.
  341. wh_ratio_clip (float): Maximum aspect ratio for boxes.
  342. Default 16 / 1000.
  343. clip_border (bool, optional): Whether clip the objects outside the
  344. border of the image. Default True.
  345. add_ctr_clamp (bool): Whether to add center clamp, when added, the
  346. predicted box is clamped is its center is too far away from
  347. the original anchor's center. Only used by YOLOF. Default False.
  348. ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF.
  349. Default 32.
  350. Returns:
  351. Tensor: Boxes with shape (B, N, num_classes * 4) or (B, N, 4) or
  352. (N, num_classes * 4) or (N, 4), where 4 represent
  353. tl_x, tl_y, br_x, br_y.
  354. References:
  355. .. [1] https://arxiv.org/abs/1311.2524
  356. Example:
  357. >>> rois = torch.Tensor([[ 0., 0., 1., 1.],
  358. >>> [ 0., 0., 1., 1.],
  359. >>> [ 0., 0., 1., 1.],
  360. >>> [ 5., 5., 5., 5.]])
  361. >>> deltas = torch.Tensor([[ 0., 0., 0., 0.],
  362. >>> [ 1., 1., 1., 1.],
  363. >>> [ 0., 0., 2., -1.],
  364. >>> [ 0.7, -1.9, -0.5, 0.3]])
  365. >>> delta2bbox(rois, deltas, max_shape=(32, 32, 3))
  366. tensor([[0.0000, 0.0000, 1.0000, 1.0000],
  367. [0.1409, 0.1409, 2.8591, 2.8591],
  368. [0.0000, 0.3161, 4.1945, 0.6839],
  369. [5.0000, 5.0000, 5.0000, 5.0000]])
  370. """
  371. means = deltas.new_tensor(means).view(1,
  372. -1).repeat(1,
  373. deltas.size(-1) // 4)
  374. stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(-1) // 4)
  375. denorm_deltas = deltas * stds + means
  376. dx = denorm_deltas[..., 0::4]
  377. dy = denorm_deltas[..., 1::4]
  378. dw = denorm_deltas[..., 2::4]
  379. dh = denorm_deltas[..., 3::4]
  380. x1, y1 = rois[..., 0], rois[..., 1]
  381. x2, y2 = rois[..., 2], rois[..., 3]
  382. # Compute center of each roi
  383. px = ((x1 + x2) * 0.5).unsqueeze(-1).expand_as(dx)
  384. py = ((y1 + y2) * 0.5).unsqueeze(-1).expand_as(dy)
  385. # Compute width/height of each roi
  386. pw = (x2 - x1).unsqueeze(-1).expand_as(dw)
  387. ph = (y2 - y1).unsqueeze(-1).expand_as(dh)
  388. dx_width = pw * dx
  389. dy_height = ph * dy
  390. max_ratio = np.abs(np.log(wh_ratio_clip))
  391. if add_ctr_clamp:
  392. dx_width = torch.clamp(dx_width, max=ctr_clamp, min=-ctr_clamp)
  393. dy_height = torch.clamp(dy_height, max=ctr_clamp, min=-ctr_clamp)
  394. dw = torch.clamp(dw, max=max_ratio)
  395. dh = torch.clamp(dh, max=max_ratio)
  396. else:
  397. dw = dw.clamp(min=-max_ratio, max=max_ratio)
  398. dh = dh.clamp(min=-max_ratio, max=max_ratio)
  399. # Use exp(network energy) to enlarge/shrink each roi
  400. gw = pw * dw.exp()
  401. gh = ph * dh.exp()
  402. # Use network energy to shift the center of each roi
  403. gx = px + dx_width
  404. gy = py + dy_height
  405. # Convert center-xy/width/height to top-left, bottom-right
  406. x1 = gx - gw * 0.5
  407. y1 = gy - gh * 0.5
  408. x2 = gx + gw * 0.5
  409. y2 = gy + gh * 0.5
  410. bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view(deltas.size())
  411. if clip_border and max_shape is not None:
  412. # clip bboxes with dynamic `min` and `max` for onnx
  413. if torch.onnx.is_in_onnx_export():
  414. from mmdet.core.export import dynamic_clip_for_onnx
  415. x1, y1, x2, y2 = dynamic_clip_for_onnx(x1, y1, x2, y2, max_shape)
  416. bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view(deltas.size())
  417. return bboxes
  418. if not isinstance(max_shape, torch.Tensor):
  419. max_shape = x1.new_tensor(max_shape)
  420. max_shape = max_shape[..., :2].type_as(x1)
  421. if max_shape.ndim == 2:
  422. assert bboxes.ndim == 3
  423. assert max_shape.size(0) == bboxes.size(0)
  424. min_xy = x1.new_tensor(0)
  425. max_xy = torch.cat(
  426. [max_shape] * (deltas.size(-1) // 2),
  427. dim=-1).flip(-1).unsqueeze(-2)
  428. bboxes = torch.where(bboxes < min_xy, min_xy, bboxes)
  429. bboxes = torch.where(bboxes > max_xy, max_xy, bboxes)
  430. return bboxes
  431. def delta2bbox_glip(rois: Tensor,
  432. deltas: Tensor,
  433. means: Sequence[float] = (0., 0., 0., 0.),
  434. stds: Sequence[float] = (1., 1., 1., 1.),
  435. max_shape: Optional[Union[Sequence[int], Tensor,
  436. Sequence[Sequence[int]]]] = None,
  437. wh_ratio_clip: float = 16 / 1000,
  438. clip_border: bool = True,
  439. add_ctr_clamp: bool = False,
  440. ctr_clamp: int = 32) -> Tensor:
  441. """Apply deltas to shift/scale base boxes.
  442. Typically the rois are anchor or proposed bounding boxes and the deltas are
  443. network outputs used to shift/scale those boxes.
  444. This is the inverse function of :func:`bbox2delta`.
  445. Args:
  446. rois (Tensor): Boxes to be transformed. Has shape (N, 4).
  447. deltas (Tensor): Encoded offsets relative to each roi.
  448. Has shape (N, num_classes * 4) or (N, 4). Note
  449. N = num_base_anchors * W * H, when rois is a grid of
  450. anchors. Offset encoding follows [1]_.
  451. means (Sequence[float]): Denormalizing means for delta coordinates.
  452. Default (0., 0., 0., 0.).
  453. stds (Sequence[float]): Denormalizing standard deviation for delta
  454. coordinates. Default (1., 1., 1., 1.).
  455. max_shape (tuple[int, int]): Maximum bounds for boxes, specifies
  456. (H, W). Default None.
  457. wh_ratio_clip (float): Maximum aspect ratio for boxes. Default
  458. 16 / 1000.
  459. clip_border (bool, optional): Whether clip the objects outside the
  460. border of the image. Default True.
  461. add_ctr_clamp (bool): Whether to add center clamp. When set to True,
  462. the center of the prediction bounding box will be clamped to
  463. avoid being too far away from the center of the anchor.
  464. Only used by YOLOF. Default False.
  465. ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF.
  466. Default 32.
  467. Returns:
  468. Tensor: Boxes with shape (N, num_classes * 4) or (N, 4), where 4
  469. represent tl_x, tl_y, br_x, br_y.
  470. """
  471. num_bboxes, num_classes = deltas.size(0), deltas.size(1) // 4
  472. if num_bboxes == 0:
  473. return deltas
  474. deltas = deltas.reshape(-1, 4)
  475. means = deltas.new_tensor(means).view(1, -1)
  476. stds = deltas.new_tensor(stds).view(1, -1)
  477. denorm_deltas = deltas * stds + means
  478. dxy = denorm_deltas[:, :2]
  479. dwh = denorm_deltas[:, 2:]
  480. # Compute width/height of each roi
  481. rois_ = rois.repeat(1, num_classes).reshape(-1, 4)
  482. pxy = ((rois_[:, :2] + rois_[:, 2:] - 1) * 0.5) # note
  483. pwh = (rois_[:, 2:] - rois_[:, :2])
  484. dxy_wh = pwh * dxy
  485. max_ratio = np.abs(np.log(wh_ratio_clip))
  486. if add_ctr_clamp:
  487. dxy_wh = torch.clamp(dxy_wh, max=ctr_clamp, min=-ctr_clamp)
  488. dwh = torch.clamp(dwh, max=max_ratio)
  489. else:
  490. dwh = dwh.clamp(min=-max_ratio, max=max_ratio)
  491. gxy = pxy + dxy_wh
  492. gwh = pwh * dwh.exp()
  493. x1y1 = gxy - (gwh - 1) * 0.5 # Note
  494. x2y2 = gxy + (gwh - 1) * 0.5 # Note
  495. bboxes = torch.cat([x1y1, x2y2], dim=-1)
  496. if clip_border and max_shape is not None:
  497. bboxes[..., 0::2].clamp_(min=0, max=max_shape[1] - 1) # Note
  498. bboxes[..., 1::2].clamp_(min=0, max=max_shape[0] - 1) # Note
  499. bboxes = bboxes.reshape(num_bboxes, -1)
  500. return bboxes