o
    Zh	>                     @   s   d dl Z d dlmZ d dlm  mZ ddlmZ ddlm	Z	 ddl
mZmZ e r0d dlmZ e jjdd	 Ze jj	dd
dZdede jdede jfddZde jdedede jfddZdddZG dd deZ							dddZdS )    N   )is_vision_available   )box_iou)RTDetrHungarianMatcher
RTDetrLoss)center_to_corners_formatc                 C   s   dd t | |D S )Nc                 S   s   g | ]	\}}||d qS ))logits
pred_boxes ).0abr   r   L/var/www/auris/lib/python3.10/site-packages/transformers/loss/loss_d_fine.py
<listcomp>$       z!_set_aux_loss.<locals>.<listcomp>zip)outputs_classoutputs_coordr   r   r   _set_aux_loss   s   r   c                    s    fddt | |||D S )Nc              	      s&   g | ]\}}}}|||| d qS ))r	   r
   pred_corners
ref_pointsteacher_cornersteacher_logitsr   )r   r   r   cdr   r   r   r   r   .   s    
	z"_set_aux_loss2.<locals>.<listcomp>r   )r   r   Zoutputs_cornersZoutputs_refr   r   r   r   r   _set_aux_loss2'   s   	r   max_num_binsup	reg_scalereturnc                    s   t |d t | }t |d t | d }|d d| d     fddt| d d ddD } fddtd| d D }| g| t|d d g | |g }d	d |D }t|d}|S )
uK  
    Generates the non-uniform Weighting Function W(n) for bounding box regression.

    Args:
        max_num_bins (int): Max number of the discrete bins.
        up (Tensor): Controls upper bounds of the sequence,
                     where maximum offset is ±up * H / W.
        reg_scale (float): Controls the curvature of the Weighting Function.
                           Larger values result in flatter weights near the central axis W(max_num_bins/2)=0
                           and steeper weights at both ends.
    Returns:
        Tensor: Sequence of Weighting Function.
    r   r   r   c                    s   g | ]	} |  d  qS r   r   r   istepr   r   r   L   r   z&weighting_function.<locals>.<listcomp>c                    s   g | ]} | d  qS r#   r   r$   r&   r   r   r   M   s    Nc                 S   s&   g | ]}|  d kr|n|d qS )r   )dim	unsqueeze)r   vr   r   r   r   O   s   & )absrangetorch
zeros_likecat)r   r    r!   Zupper_bound1Zupper_bound2left_valuesright_valuesvaluesr   r&   r   weighting_function;   s   "*r4   gtc                 C   s"  |  d} t|||}|d| d }|dk}tj|ddd }| }t|}	t|}
|dk||k @ }||  }|| }||d  }t| | | }t|| |  }|||  |	|< d|	|  |
|< |dk }d|	|< d|
|< d||< ||k}d|	|< d|
|< |d ||< ||	|
fS )a	  
    Decodes bounding box ground truth (GT) values into distribution-based GT representations.

    This function maps continuous GT values into discrete distribution bins, which can be used
    for regression tasks in object detection models. It calculates the indices of the closest
    bins to each GT value and assigns interpolation weights to these bins based on their proximity
    to the GT value.

    Args:
        gt (Tensor): Ground truth bounding box values, shape (N, ).
        max_num_bins (int): Maximum number of discrete bins for the distribution.
        reg_scale (float): Controls the curvature of the Weighting Function.
        up (Tensor): Controls the upper bounds of the Weighting Function.

    Returns:
        Tuple[Tensor, Tensor, Tensor]:
            - indices (Tensor): Index of the left bin closest to each GT value, shape (N, ).
            - weight_right (Tensor): Weight assigned to the right bin, shape (N, ).
            - weight_left (Tensor): Weight assigned to the left bin, shape (N, ).
    r(   r   r   r)   g      ?g        皙?)	reshaper4   r*   r.   sumfloatr/   longr,   )r5   r   r!   r    Zfunction_valuesZdiffsmaskZclosest_left_indicesindicesweight_rightweight_leftZvalid_idx_maskZvalid_indicesr1   r2   Z
left_diffsZright_diffsZinvalid_idx_mask_negZinvalid_idx_mask_posr   r   r   translate_gtT   s2   



r@   r7   c                 C   sF  t |}| dddf |dddf  | d | d  d|  }| dddf |dddf  | d | d  d|  }|dddf | dddf  | d | d  d|  }|ddd	f | dddf  | d | d  d|  }	t||||	gd
}
t|
|||\}
}}|dur|
jd|| d}
|
d
 | | fS )a  
    Converts bounding box coordinates to distances from a reference point.

    Args:
        points (Tensor): (n, 4) [x, y, w, h], where (x, y) is the center.
        bbox (Tensor): (n, 4) bounding boxes in "xyxy" format.
        max_num_bins (float): Maximum bin value.
        reg_scale (float): Controlling curvarture of W(n).
        up (Tensor): Controlling upper bounds of W(n).
        eps (float): Small value to ensure target < max_num_bins.

    Returns:
        Tensor: Decoded distances.
    Nr   ).r   gؗҜ<      ?r   ).   r   rB   r(   minmax)r,   r.   stackr@   clampr8   detach)ZpointsZbboxr   r!   r    ZepslefttoprightbottomZ	four_lensr>   r?   r   r   r   bbox2distance   s   8888rM   c                       s>   e Zd ZdZ fddZ	dddZdd	d
Zdd Z  ZS )	DFineLossa  
    This class computes the losses for D-FINE. The process happens in two steps: 1) we compute hungarian assignment
    between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth /
    prediction (supervise class and box).

    Args:
        matcher (`DetrHungarianMatcher`):
            Module able to compute a matching between targets and proposals.
        weight_dict (`Dict`):
            Dictionary relating each loss with its weights. These losses are configured in DFineConf as
            `weight_loss_vfl`, `weight_loss_bbox`, `weight_loss_giou`, `weight_loss_fgl`, `weight_loss_ddf`
        losses (`List[str]`):
            List of all the losses to be applied. See `get_loss` for a list of all available losses.
        alpha (`float`):
            Parameter alpha used to compute the focal loss.
        gamma (`float`):
            Parameter gamma used to compute the focal loss.
        eos_coef (`float`):
            Relative classification weight applied to the no-object category.
        num_classes (`int`):
            Number of object categories, omitting the special no-object category.
    c                    sj   t  | t|| _|j| _|j|j|j|j|j	d| _
g d| _|j| _tjt|jgdd| _d S )N)Zloss_vflZ	loss_bboxZ	loss_giouloss_fglloss_ddf)vflboxeslocalF)Zrequires_grad)super__init__r   Zmatcherr   Zweight_loss_vflZweight_loss_bboxZweight_loss_giouZweight_loss_fglZweight_loss_ddfZweight_dictlossesr!   nn	Parameterr.   Ztensorr    )selfconfig	__class__r   r   rU      s   

zDFineLoss.__init__Nr9   c                 C   s   |  }|d }	tj||dd|d tj||	dd|d  }
|d ur.| }|
| }
|d ur:|
 | }
|
S |dkrD|
 }
|
S |dkrL|
 }
|
S )Nr   none	reductionr(   meanr9   )r;   FZcross_entropyr8   r:   r9   r`   )rY   predlabelr>   r?   weightr_   
avg_factorZdis_leftZ	dis_rightlossr   r   r    unimodal_distribution_focal_loss   s&   z*DFineLoss.unimodal_distribution_focal_loss   c                 C   s  i }d|v rZ|  |}tjdd t||D dd}|d | d| jd }	|d |  }
t  t|
t	|| j| j
| j| _W d	   n1 sMw   Y  | j\}}}ttt	|d
 | t	|d }|ddddd }| j|	|||||d|d< |d d| jd }	|d d| jd }t|	|r|	 d |d< |S |d  jddd }tj|tjd}d||< |ddddd}||| |j||< |ddddd }||d  tjddtj|	| ddtj| | ddd }d|d
 j d  }| | d |  | d | _!| _"|# r7|| $ nd}| # rF||  $ nd}|| j! || j"  | j!| j"  |d< |S )zaCompute Fine-Grained Localization (FGL) Loss
        and Decoupled Distillation Focal (DDF) Loss.r   c                 S   s    g | ]\}\}}|d  | qS )rR   r   )r   t_r%   r   r   r   r      s     z(DFineLoss.loss_local.<locals>.<listcomp>r   r6   r(   r   r   Nr
      )re   rO   r   rP   r   )dtypeTr   r]   r^   rA   )%Z_get_source_permutation_idxr.   r0   r   r8   r   rH   Zno_gradrM   r   r!   r    Zfgl_targetsZdiagr   r*   repeatrg   equalr9   ZsigmoidrE   r/   boolZ
reshape_astorl   rW   Z	KLDivLossra   Zlog_softmaxZsoftmaxshapenum_posZnum_neganyr`   )rY   outputstargetsr=   	num_boxesTrV   idxZtarget_boxesr   r   Ztarget_cornersr>   r?   ZiousZweight_targetsZweight_targets_localr<   Zloss_match_localZbatch_scaleZloss_match_local1Zloss_match_local2r   r   r   
loss_local   sz   


	
	 


zDFineLoss.loss_localc                 C   sD   | j | j| j| j| jd}||vrtd| d|| ||||S )N)ZcardinalityrS   rR   ZfocalrQ   zLoss z not supported)Zloss_cardinalityry   Z
loss_boxesZloss_labels_focalZloss_labels_vfl
ValueError)rY   rf   rt   ru   r=   rv   Zloss_mapr   r   r   get_loss9  s   zDFineLoss.get_loss)Nr9   N)rh   )	__name__
__module____qualname____doc__rU   rg   ry   r{   __classcell__r   r   r[   r   rN      s    

KrN   c              	   K   s  t |}|| i }| |d< |jddd|d< d }|jr|	d urtj|jddd|	d dd\}}tj||	d dd\}}tj|
|	d dd\}}tj||	d dd\}}t|d d d d	f dd|d d d d	f dd|d d d d	f dd|d d d d	f dd|d d d	f |d d d	f }||d
< |d
 t	|g|jdddg t|dd|dd|dd|dd|d d d	f |d d d	f }||d< |	|d< |||}t
| }|||fS )Nr	   r   r   rC   r
   Zdn_num_splitr   r6   r(   auxiliary_outputsdn_auxiliary_outputsdenoising_meta_values)rN   rp   rG   Zauxiliary_lossr.   splitr   Z	transposeextendr   r9   r3   )r	   labelsZdevicer
   rZ   r   r   Zenc_topk_logitsZenc_topk_bboxesr   Zpredicted_cornersZinitial_reference_pointskwargs	criterionZoutputs_lossr   Zdn_out_coordZdn_out_classZdn_out_cornersZout_cornersZdn_out_refsZout_refsr   Z	loss_dictrf   r   r   r   DFineForObjectDetectionLossF  sN   

	





r   )NN)r7   )NNNNNNN)r.   Ztorch.nnrW   Ztorch.nn.functionalZ
functionalra   utilsr   Zloss_for_object_detectionr   Zloss_rt_detrr   r   Ztransformers.image_transformsr   ZjitZunusedr   r   intZTensorr4   r@   rM   rN   r   r   r   r   r   <module>   s4   

? 