
    eTh+                         S SK r S SKJr  SSKJr  SSKJr  SSKJrJ	r	J
r
Jr  \" 5       (       a  S SKJr    SS\ R                  S	\ R                  S
\S\S\4
S jjr " S S\5      r " S S\	5      r    SS jrg)    N   )center_to_corners_format)is_scipy_available   )HungarianMatcher	ImageLoss_set_aux_lossgeneralized_box_iou)linear_sum_assignmentinputstargets	num_boxesalphagammac                     U R                  5       n[        R                  R                  XSS9nXQ-  SU-
  SU-
  -  -   nUSU-
  U-  -  nUS:  a  X1-  SU-
  SU-
  -  -   n	X-  nUR	                  5       U-  $ )a
  
Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.

Args:
    inputs (`torch.FloatTensor` of arbitrary shape):
        The predictions for each example.
    targets (`torch.FloatTensor` with the same shape as `inputs`)
        A tensor storing the binary classification label for each element in the `inputs` (0 for the negative class
        and 1 for the positive class).
    num_boxes (`int`):
        The total number of boxes in the batch.
    alpha (`float`, *optional*, defaults to 0.25):
        Optional weighting factor in the range (0,1) to balance positive vs. negative examples.
    gamma (`int`, *optional*, defaults to 2):
        Exponent of the modulating factor (1 - p_t) to balance easy vs hard examples.

Returns:
    Loss tensor
none)	reductionr   r   )sigmoidnn
functional binary_cross_entropy_with_logitssum)
r   r   r   r   r   probce_lossp_tlossalpha_ts
             ]/var/www/auris/envauris/lib/python3.13/site-packages/transformers/loss/loss_grounding_dino.pysigmoid_focal_lossr      s    4 >>Dmm<<VX^<_G
.AHW5
5Cq3w5()Dz/QY1w;$??~88:	!!    c                   B    \ rS rSr\R
                  " 5       S 5       rSrg)GroundingDinoHungarianMatcherC   c           
         US   R                   SS u  p4US   R                  SS5      R                  5       nUS   R                  SS5      nUS   n[        R                  " [        Xr5       VV	s/ s H  u  pXS      PM     sn	n5      nXwR                  S	S
S9-  n[        R                  " U V
s/ s H  oS   PM	     sn
5      nSnSnSU-
  X]-  -  SU-
  S-   R                  5       * -  nUSU-
  U-  -  US-   R                  5       * -  nX-
  UR                  5       -  n[        R                  " XkSS9n[        [        U5      [        U5      5      * nU R                  U-  U R                  U-  -   U R                  U-  -   nUR                  X4S	5      R!                  5       nU V
s/ s H  n
[#        U
S   5      PM     nn
[%        UR'                  US	5      5       VVs/ s H  u  nn[)        UU   5      PM     nnnU VVs/ s HL  u  nn[        R*                  " U[        R,                  S9[        R*                  " U[        R,                  S94PMN     snn$ s  sn	nf s  sn
f s  sn
f s  snnf s  snnf )ad  
Args:
    outputs (`dict`):
        A dictionary that contains at least these entries:
        * "logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
        * "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates.
        * "label_maps": Tuple of tensors of dim [num_classes, hidden_dim].
    targets (`List[dict]`):
        A list of targets (len(targets) = batch_size), where each target is a dict containing:
        * "class_labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of
          ground-truth
         objects in the target) containing the class labels
        * "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates.

Returns:
    `List[Tuple]`: A list of size `batch_size`, containing tuples of (index_i, index_j) where:
    - index_i is the indices of the selected predictions (in order)
    - index_j is the indices of the corresponding selected targets (in order)
    For each batch element, it holds: len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
logitsNr   r   r   
pred_boxes
label_mapsclass_labelsT)dimkeepdimboxes      ?       @g:0yE>)p)dtype)shapeflattenr   torchcatzipr   logtcdistr
   r   	bbox_cost
class_cost	giou_costviewcpulen	enumeratesplitr   	as_tensorint64)selfoutputsr   
batch_sizenum_queriesout_probout_bboxr'   	label_maptargetvtarget_bboxr   r   neg_cost_classpos_cost_classr:   r9   r;   cost_matrixsizesicindicesjs                            r   forward%GroundingDinoHungarianMatcher.forwardD   sd   , #*("3"9"9"1"=
 8$,,Q2::<<(00A6\*
 YY[^_i[st[sFWi	*@ A[stu
..R."FF
 iiW =W7W => e)8a(lT>Q=V=V=X<XY1x<E"9:4?T?T?V>VW$5G
 KK;	 ))A()KMefqMrss	 nny04??Z3OORVR`R`clRll!&&zCGGI*12'QQwZ'2;D[EVEVW\^`Ea;bc;b41a(1.;bckrskrcgcdfg%++6QVQ\Q\8]^krss7  u
 !>( 3css   0I
1I (I% I*AI0 N)__name__
__module____qualname____firstlineno__r3   no_gradrU   __static_attributes__rW   r    r   r"   r"   C   s    
]]_8t 8tr    r"   c                   *    \ rS rSrSrS rS rS rSrg)GroundingDinoImageLoss   aZ  
This class computes the losses for `GroundingDinoForObjectDetection`. The process happens in two steps: 1) we
compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of
matched ground-truth / prediction (supervise class and box).

Args:
    matcher (`GroundingDinoHungarianMatcher`):
        Module able to compute a matching between targets and proposals.
    focal_alpha (`float`):
        Alpha parameter in focal loss.
    losses (`List[str]`):
        List of all the losses to be applied. See `get_loss` for a list of all available losses.
c                 f    [         R                  R                  U 5        Xl        X l        X0l        g N)r   Module__init__matcherfocal_alphalosses)rC   re   rf   rg   s       r   rd   GroundingDinoImageLoss.__init__   s$    
		4 &r    c                    US   n[         R                  " [        [        X#5      5       VVVVs/ s H3  u  nu  nu  pxUS:  a  US   U   [	        US   U   5      -   OUS   U   PM5     snnnn5      n	[         R                  " US   SS9n
U R                  U5      n[         R                  " XDR                  [         R                  S9nX   R                  [         R                  5      X'   U$ s  snnnnf )z.
Create one_hot based on the matching indices
r%   r   r(   r'   )r*   )devicer0   )
r3   r4   r?   r5   r>   _get_source_permutation_idx
zeros_likerj   longto)rC   rD   r   rS   r%   rQ   rJ   _Jr(   r'   idxtarget_classes_onehots                r   _get_target_classes_one_hot2GroundingDinoImageLoss._get_target_classes_one_hot   s     "yy ,5S5J+K+K'A' NOQRU~&q)C0Ea0H,IIX^_mXnopXqq+K
 YYw|4!<
..w7 % 0 0UZU_U_ `%/%=%@%@%L"$$s   :C.c                 0   SU;  a  [        S5      eSU;  a  [        S5      eU R                  XU5      nUS   nUS   n[        R                  " Xg5      n[        R                  " XW5      nUR	                  5       n[        UUUU R                  SS9nSU0n	U	$ )z
Classification loss (Binary focal loss) targets dicts must contain the key "class_labels" containing a tensor
of dim [nb_target_boxes]
r%   z#No logits were found in the outputs	text_maskz&No text_mask were found in the outputsr   )r   r   r   r   r   loss_ce)KeyErrorrs   r3   masked_selectfloatr   rf   )
rC   rD   r   rS   r   rr   source_logitsrv   rw   rg   s
             r   loss_labels"GroundingDinoImageLoss.loss_labels   s    
 7"@AAg%CDD $ @ @SZ [)K(	 ++ME % 3 34I U 5 ; ; =$ )""
 W%r    )rf   rg   re   N)	rX   rY   rZ   r[   __doc__rd   rs   r|   r]   rW   r    r   r_   r_      s    %(r    r_   c           
      >  ^^ [        UR                  UR                  UR                  S9n/ SQn[	        UUR
                  US9nUR                  U5        0 nXS'   X>S'   X^S'   XnS'   S nUR                  (       a"  [        Xx5      nU H  nUUS'   UUS'   M     XS'   U" X5      mUR                  (       aJ  U	U
UUS	.nU" UU5      nUR                  5        VVs0 s H  u  nnUS
-   U_M     nnnTR                  U5        SUR                  UR                  S.mUR                  (       a:  TR                  5        VVs0 s H  u  nnUS
-   U_M     nnnTR                  U5        UR                  (       an  0 n[        UR                  S-
  5       H?  nUR                  TR                  5        VVs0 s H  u  nnUSU 3-   U_M     snn5        MA     TR                  U5        [!        UU4S jTR#                  5        5       5      nUTU4$ s  snnf s  snnf s  snnf )N)r:   r9   r;   )labelsr,   cardinality)re   rf   rg   r%   r&   r'   rv   auxiliary_outputs)r%   r&   r'   rv   _encr.   )rw   	loss_bbox	loss_giour   ro   c              3   H   >#    U  H  oT;   d  M
  TU   TU   -  v   M     g 7frb   rW   ).0k	loss_dictweight_dicts     r   	<genexpr>6GroundingDinoForObjectDetectionLoss.<locals>.<genexpr>  s(     [6F{JZ,y|k!n,6Fs   	"")r"   r:   r9   r;   r_   rf   rn   auxiliary_lossr	   	two_stageitemsupdatebbox_loss_coefficientgiou_loss_coefficientrangedecoder_layersr   keys)r%   r   rj   r&   configr'   rv   outputs_classoutputs_coordencoder_logitsencoder_pred_boxesre   rg   	criterionoutputs_lossr   
aux_outputencoder_outputs_lossencoder_loss_dictr   rK   enc_weight_dictaux_weight_dictrQ   r   r   r   s                            @@r   #GroundingDinoForObjectDetectionLossr      s9    ,$$0@0@FL\L\G 0F&&&I
 LLL#!+!+ ))-G+J'1J|$&/J{# , ->(),/I$,$"	 
 &&:FC7H7N7N7PQ7Ptq!QZ]7PQ*+ 1111K 5@5F5F5HI5HTQ1v:q=5HI?+v,,q01A""{?P?P?R#S?Rtq!A!A3KN?R#ST 2?+[inn6F[[D---) R J $Ts   H>H1H)r-   r   )NNNN)r3   torch.nnr   image_transformsr   utilsr   loss_for_object_detectionr   r   r	   r
   scipy.optimizer   Tensorintrz   r   r"   r_   r   rW   r    r   <module>r      s      7 & f f 4 $"LL$"\\$" $" 	$"
 $"N:t$4 :tzFY Fb F.r    