
    khHC              	       j   d Z 	 ddlmZmZmZ ddlZddlZddlm	Z	 ddl
mZmZ ddlmZmZmZmZmZ ddlmZ dZd	Zdd
efdZdej2                  dej2                  fdZ	 ddeej2                     deej2                     dedefdZdeeef   defdZ G d de	j<                        Z G d de       Z!y)z RetinaNet / EfficientDet Anchor Gen

Adapted for PyTorch from Tensorflow impl at
    https://github.com/google/automl/blob/6f6694cec1a48cdb33d5d1551a2d5db8ad227798/efficientdet/anchors.py

Hacked together by Ross Wightman, original copyright below
    )OptionalTupleSequenceN)batched_nmsremove_small_boxes)ArgMaxMatcherFasterRcnnBoxCoderBoxListIouSimilarityTargetAssigner   )batched_soft_nmsg      g     joutput_xyxyc                    |dddf   |dddf   z   dz  }|dddf   |dddf   z   dz  }|dddf   |dddf   z
  }|dddf   |dddf   z
  }| j                  d      \  }}}	}
t        j                  |
      |z  }t        j                  |	      |z  }||z  |z   }||z  |z   }||dz  z
  }||dz  z
  }||dz  z   }||dz  z   }|rt        j                  ||||gd      }|S t        j                  ||||gd      }|S )a  Transforms relative regression coordinates to absolute positions.

    Network predictions are normalized and relative to a given anchor; this
    reverses the transformation and outputs absolute coordinates for the input image.

    Args:
        rel_codes: box regression targets.

        anchors: anchors on all feature levels.

    Returns:
        outputs: bounding boxes.

    Nr      r      dim       @)unbindtorchexpstack)	rel_codesanchorsr   	ycenter_a	xcenter_ahawatytxthtwwhycenterxcenteryminxminymaxxmaxouts                       J/var/www/teggl/fontify/venv/lib/python3.12/site-packages/effdet/anchors.pydecode_box_outputsr.   -   sL    AA.!3IAA.!3I	AA	&B	AA	&B%%!%,NBB		"A		"A2g	!G2g	!GQVDQVDQVDQVDkk4tT2: J kk4tT2:J    boxessizec                 ~    | j                  d      } t        j                  ||gd      }| j                  |      } | S )Nr   )minr   )clampr   catr3   )r0   r1   s     r-   clip_boxes_xyxyr6   R   s8    KKAKE99dD\q)DIIdOELr/   	img_scaleimg_sizemax_det_per_imagesoft_nmsc	                    |j                   d   dk(  sJ |j                   d   dk(  sJ | j                   d   dk(  sJ ||ddf   }t        |j                         |d      }	||t        |	||z        }	| j	                         j                  d      j                         }
|rt        |	|
|ddd	      \  }}||
|<   nt        |	|
|d
      }|d| }|	|   }	|
|df   }
||df   dz   }||	|z  }	t        |      }t        j                  |	|
|j                         gd      }||k  rHt        j                  |t        j                  ||z
  df|j                  |j                        gd      }|S )ax  Generates detections with RetinaNet model outputs and anchors.

    Args:
        cls_outputs: a torch tensor with shape [N, 1], which has the highest class
            scores on all feature levels. The N is the number of selected
            top-K total anchors on all levels.

        box_outputs: a torch tensor with shape [N, 4], which stacks box regression
            outputs on all feature levels. The N is the number of selected top-k
            total anchors on all levels.

        anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all
            feature levels. The N is the number of selected top-k total anchors on all levels.

        indices: a torch tensor with shape [N], which is the indices from top-k selection.

        classes: a torch tensor with shape [N], which represents the class
            prediction on all selected anchors from top-k selection.

        img_scale: a float tensor representing the scale between original image
            and input image for the detector. It is used to rescale detections for
            evaluating with the original groundtruth annotations.

        max_det_per_image: an int constant, added as argument to make torchscript happy

    Returns:
        detections: detection results in a tensor with shape [max_det_per_image, 6],
            each row representing [x_min, y_min, x_max, y_max, score, class]
       r   NT)r   g333333?gMbP?)method_gaussianiou_thresholdscore_threshold      ?)r?   r      )devicedtyper   )shaper.   floatr6   sigmoidsqueezer   r   lenr   r5   zerosrC   rD   )cls_outputsbox_outputsanchor_boxesindicesclassesr7   r8   r9   r:   r0   scorestop_detection_idxsoft_scoresnum_det
detectionss                  r-   generate_detectionsrU   Y   s   B R A%%%b!Q&&&R A%%%
+L {002LdSE!5x)';<  "**1-335F)967D]a*c&;$/ !'vwcR **<+<=#$E%t+,F'-.2G	! #$GE67==?;CJ""YYKK*W4a8ARARZdZjZjk 
 
 r/   
image_size	max_levelc                     | }|g}t        d|dz         D ]/  }|d   dz
  dz  dz   |d   dz
  dz  dz   f}|j                  |       1 |S )zGet feat widths and heights for all levels.
    Args:
      image_size: a tuple (H, W)
      max_level: maximum feature level.
    Returns:
      feat_sizes: a list of tuples (height, width) for each level.
    r   r   r   )rangeappend)rV   rW   	feat_size
feat_sizes_s        r-   get_feat_sizesr^      sp     IJ1i!m$ %lQ&1,q09Q<!3C2IA2MN	)$% r/   c                   T     e Zd ZdZdeeef   f fdZed        Zd Z	d Z
d Z xZS )AnchorszRetinaNet Anchors class.rV   c                    t         t        |           || _        || _        || _        || _        t        |t              rt        |      ||z
  dz   k(  sJ || _
        n|g||z
  dz   z  | _
        t        |t              rt        |      dk(  sJ t        |      | _        t        ||      | _        | j                         | _        | j#                  d| j%                                y)a  Constructs multiscale RetinaNet anchors.

        Args:
            min_level: integer number of minimum level of the output feature pyramid.

            max_level: integer number of maximum level of the output feature pyramid.

            num_scales: integer number representing intermediate scales added
                on each level. For instances, num_scales=2 adds two additional
                anchor scales [2^0, 2^0.5] on each level.

            aspect_ratios: list of tuples representing the aspect ratio anchors added
                on each level. For instances, aspect_ratios =
                [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.

            anchor_scale: float number representing the scale of size of the base
                anchor to the feature stride 2^level.

            image_size: Sequence specifying input image size of model (H, W).
                The image_size should be divided by the largest feature stride 2^max_level.
        r   r   r0   N)superr`   __init__	min_levelrW   
num_scalesaspect_ratios
isinstancer   rI   anchor_scalestuplerV   r^   r\   _generate_configsconfigregister_buffer_generate_boxes)selfrd   rW   re   rf   anchor_scalerV   	__class__s          r-   rc   zAnchors.__init__   s    , 	gt%'""$*lH-|$	I(=(AAAA!-D".9y3H13L!MD*h/C
Oq4HHH
+(Y?,,.Wd&:&:&<=r/   c                      | |j                   |j                  |j                  |j                  |j                  |j
                        S N)rd   rW   re   rf   ro   rV   )clsrk   s     r-   from_configzAnchors.from_config   sB    f..v33!2!24 	4r/   c                    i }| j                   }t        | j                  | j                  dz         D ]  }g ||<   t        | j                        D ]  }| j
                  D ]|  }||   j                  |d   d   t        ||   d         z  |d   d   t        ||   d         z  f|t        | j                        z  || j                  || j                  z
     f       ~   |S )z(Generate configurations of anchor boxes.r   r   )	r\   rY   rd   rW   re   rf   rZ   rF   rh   )rn   anchor_configsr\   levelscale_octaveaspects         r-   rj   zAnchors._generate_configs   s   __
4>>4>>A+=> 	FE$&N5! %doo 6 F"00 FF"5)00$Q-*U:e3DQ3G-HH$Q-*U:e3DQ3G-HHJ%doo(>>++EDNN,BCEFFF	F r/   c           	         g }| j                   j                         D ]  \  }}g }|D ]]  }|\  }}}}	|	|d   z  d|z  z  }
|	|d   z  d|z  z  }t        |t              r|\  }}nt	        j
                  |      }d|z  }|
|z  dz  }||z  dz  }t	        j                  |d   dz  | j                  d   |d         }t	        j                  |d   dz  | j                  d   |d         }t	        j                  ||      \  }}|j                  d      }|j                  d      }t	        j                  ||z
  ||z
  ||z   ||z   f      }t	        j                  |dd      }|j                  t	        j                  |d             ` t	        j                  |d      }|j                  |j                  ddg              t	        j                  |      }t        j                   |      j#                         }|S )	z#Generates multi-scale anchor boxes.r   r   r   g      ?r   r<   )axisr=   )rk   itemsrg   r   npsqrtarangerV   meshgridreshapevstackswapaxesrZ   expand_dimsconcatenater   
from_numpyrF   )rn   	boxes_allr]   configsboxes_levelrk   strideoctave_scalery   ro   base_anchor_size_xbase_anchor_size_yaspect_xaspect_yanchor_size_x_2anchor_size_y_2xyxvyvr0   rM   s                         r-   rm   zAnchors._generate_boxes   s   	++++- 	;JAwK! B=C:fl%1F1I%=\@Q%Q"%1F1I%=\@Q%Q"fh/)/&Hh!wwvH"X~H"4x"?#"E"4x"?#"EIIfQi!mT__Q-?KIIfQi!mT__Q-?KQ*BZZ^ZZ^		2#7o9M#%#7o9M#O PE1a0""2>>%a#@A+B0 ..1=K[00"a9:7	;: yy+''5;;=r/   c                 F    | j                   t        | j                        z  S rr   )re   rI   rf   )rn   s    r-   get_anchors_per_locationz Anchors.get_anchors_per_location  s    T%7%7!888r/   )__name__
__module____qualname____doc__r   intrc   classmethodrt   rj   rm   r   __classcell__)rp   s   @r-   r`   r`      sD    "%>bghkmphpbq %>N 4 4"H9r/   r`   c                   2    e Zd ZdZddedefdZd	dZd	dZy)
AnchorLabelerz)Labeler for multiscale anchor boxes.
    num_classesmatch_thresholdc                     t               }t        ||dd      }t               }t        |||      | _        || _        || _        || _        i | _        y)af  Constructs anchor labeler to assign labels to anchors.

        Args:
            anchors: an instance of class Anchors.

            num_classes: integer number representing number of classes in the dataset.

            match_threshold: float number between 0 and 1 representing the threshold
                to assign positive labels for anchors.
        T)unmatched_thresholdnegatives_lower_than_unmatchedforce_match_for_each_rowN)	r   r   r	   r   target_assignerr   r   r   indices_cache)rn   r   r   r   similarity_calcmatcher	box_coders          r-   rc   zAnchorLabeler.__init__!  s[     (/ /+/%)	+
 '(	-ow	R.&r/   c                    g }g }|r|dkD  }||   }||   }| j                   j                  t        | j                  j                        t        |      |      \  }}}	|dz
  j                         }	 d}
t        | j                  j                  | j                  j                  dz         D ]  }| j                  j                  |   }|d   |d   z  | j                  j                         z  }|j                  ||
|
|z    j                  |d   |d   dg             |j                  ||
|
|z    j                  |d   |d   dg             |
|z  }
 |	j                  dkD  j                         j                         }|||fS )aH  Labels anchors with ground truth inputs.

        Args:
            gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
                For each row, it stores [y0, x0, y1, x1] for four corners of a box.

            gt_classes: A integer tensor with shape [N, 1] representing groundtruth classes.

            filter_valid: Filter out any boxes w/ gt class <= -1 before assigning

        Returns:
            cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level].
                The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l
                represent the dimension of class logits at l-th level.

            box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level].
                The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and
                width_l represent the dimension of bounding box regression output at l-th level.

            num_positives: scalar tensor storing number of positives in an image.
        r<   r   r   )r   assignr
   r   r0   longrY   rd   rW   r\   r   rZ   r   match_resultsrF   sum)rn   gt_boxes
gt_classesfilter_validcls_targets_outbox_targets_out	valid_idxcls_targetsbox_targetsmatchescountrw   r[   stepsnum_positivess                  r-   label_anchorszAnchorLabeler.label_anchors:  s   , "RI	*H#I.J,0,@,@,G,GDLL&&'):J-H)[' #Q,,. 	@4<<114<<3I3IA3MN 	E//6IaL9Q</$,,2W2W2YYE"";uUU]#C#K#KYWX\[def[gikLl#mn"";uUU]#C#K#KYWX\[def[gikLl#mnUNE	 !..3::<@@B>>r/   c           	      z   t        |      }|t        |      k(  sJ | j                  j                  | j                  j                  z
  dz   }t	        |      D cg c]  }g  }}t	        |      D cg c]  }g  }}g }	t        | j                  j                        }
t	        |      D ]  }||dz
  k(  }|r"||   dkD  }t        ||   |         }||   |   }nt        ||         }||   }| j                  j                  |
||      \  }}}|dz
  j                         }	 d}t	        | j                  j                  | j                  j                  dz         D ]  }|| j                  j                  z
  }| j                  j                  |   }|d   |d   z  | j                  j                         z  }||   j                  ||||z    j                  |d   |d   dg             ||   j                  ||||z    j                  |d   |d   dg             ||z  }|st        j                  ||         ||<   t        j                  ||         ||<    |	j                  |j                   dkD  j#                         j%                                |st        j                  |	      }	 |||	fS c c}w c c}w )Nr   r<   r   )rI   r   rW   rd   rY   r
   r0   r   r   r   r\   r   rZ   r   r   r   r   rF   r   )rn   r   r   r   
batch_size
num_levelsr]   r   r   num_positives_outanchor_box_listilast_sampler   gt_box_list
gt_class_ir   r   r   r   rw   	level_idxr[   r   s                           r-   batch_label_anchorsz!AnchorLabeler.batch_label_anchorsl  s   ]
S_,,,\\++dll.D.DDqH
',Z'89!299',Z'89!299!$,,"4"45z" !	CAzA~-K&qMB.	%hqk)&<=']95
%hqk2']
040D0D0K0KO]hjt0u-Kg '?002K DEt||55t||7M7MPQ7QR Y!DLL$:$::	 LL33E:	!!y|3dll6[6[6]]	*11eem4<<ilIVWLZ\=]^`	*11eem4<<ilIVWLZ\=]^`16_Y=W1XOI.16_Y=W1XOI.Y $$g&;&;b&@%G%G%I%M%M%OP$)KK0A$B!C!	CF 1BBBQ :9s   	J31	J8N)rA   )T)	r   r   r   r   r   rF   rc   r   r    r/   r-   r   r     s&     S  5  20?d,Cr/   r   )F)d   F)"r   typingr   r   r   numpyr}   r   torch.nnnntorchvision.ops.boxesr   r   effdet.object_detectionr   r	   r
   r   r   r:   r   MIN_CLASS_SCORE_DUMMY_DETECTION_SCOREboolr.   Tensorr6   r   rU   r^   Moduler`   objectr   r   r/   r-   <module>r      s   *
 - ,    A m m &   " "J5<< u||  8=IELL)I5=ell5KI I 15IXuS#X 3  e9bii e9P{CF {Cr/   