o
    ZhrV                     @   s   d dl Z d dlmZ d dlm  mZ ddlmZmZm	Z	 ddl
mZmZmZmZmZ e r4d dlmZ e r=d dlmZ e jjdd	 ZG d
d dejZG dd dejZ					dddZdS )    N   )is_scipy_availableis_vision_availablerequires_backends   )box_iou	dice_lossgeneralized_box_iounested_tensor_from_tensor_listsigmoid_focal_losslinear_sum_assignment)center_to_corners_formatc                 C   s   dd t | |D S )Nc                 S   s   g | ]	\}}||d qS ))logits
pred_boxes ).0abr   r   M/var/www/auris/lib/python3.10/site-packages/transformers/loss/loss_rt_detr.py
<listcomp>+   s    z!_set_aux_loss.<locals>.<listcomp>)zip)outputs_classoutputs_coordr   r   r   _set_aux_loss&   s   r   c                       s0   e Zd ZdZ fddZe dd Z  ZS )RTDetrHungarianMatchera  This class computes an assignment between the targets and the predictions of the network

    For efficiency reasons, the targets don't include the no_object. Because of this, in general, there are more
    predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, while the others are
    un-matched (and thus treated as non-objects).

    Args:
        config: RTDetrConfig
    c                    sz   t    t| dg |j| _|j| _|j| _|j	| _	|j
| _|j| _| j| j  kr8| j  kr8dkr;td d S d S )NZscipyr   z#All costs of the Matcher can't be 0)super__init__r   Zmatcher_class_cost
class_costZmatcher_bbox_cost	bbox_costZmatcher_giou_cost	giou_costuse_focal_lossZmatcher_alphaalphaZmatcher_gammagamma
ValueError)selfconfig	__class__r   r   r   9   s   
"
zRTDetrHungarianMatcher.__init__c                 C   s~  |d j dd \}}|d dd}tdd |D }td	d |D }| jrft|d dd}|dd|f }d| j || j  d| d
 	   }	| jd| | j  |d
 	   }
|
|	 }n|d dd
d}|dd|f  }tj||dd}tt|t| }| j| | j|  | j|  }|||d }dd |D }dd t||dD }dd |D S )a  Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "class_labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        r   Nr   r   r   r   c                 S      g | ]}|d  qS class_labelsr   r   vr   r   r   r   b       z2RTDetrHungarianMatcher.forward.<locals>.<listcomp>c                 S   r)   boxesr   r,   r   r   r   r   c   r.   g:0yE>)pc                 S      g | ]}t |d  qS r/   lenr,   r   r   r   r   y       c                 S   s   g | ]
\}}t || qS r   r   )r   icr   r   r   r   z   s    c                 S   s0   g | ]\}}t j|t jd t j|t jd fqS )dtype)torch	as_tensorint64)r   r7   jr   r   r   r   |   s   0 )shapeflattenr;   catr!   Fsigmoidr"   r#   logZsoftmaxZcdistr	   r   r   r   r    viewcpu	enumeratesplit)r%   outputstargetsZ
batch_sizeZnum_queriesZout_bboxZ
target_idsZtarget_bboxZout_probZneg_cost_classZpos_cost_classr   r   r    Zcost_matrixsizesindicesr   r   r   forwardH   s&   &"
zRTDetrHungarianMatcher.forward)	__name__
__module____qualname____doc__r   r;   no_gradrM   __classcell__r   r   r'   r   r   .   s
    
r   c                       s   e Zd ZdZ fddZdddZdddZe d	d
 Z	dd Z
dd ZdddZdd Zdd ZdddZdd Zedd Zdd Z  ZS )
RTDetrLossah  
    This class computes the losses for RTDetr. The process happens in two steps: 1) we compute hungarian assignment
    between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth /
    prediction (supervise class and box).

    Args:
        matcher (`DetrHungarianMatcher`):
            Module able to compute a matching between targets and proposals.
        weight_dict (`Dict`):
            Dictionary relating each loss with its weights. These losses are configured in RTDetrConf as
            `weight_loss_vfl`, `weight_loss_bbox`, `weight_loss_giou`
        losses (`List[str]`):
            List of all the losses to be applied. See `get_loss` for a list of all available losses.
        alpha (`float`):
            Parameter alpha used to compute the focal loss.
        gamma (`float`):
            Parameter gamma used to compute the focal loss.
        eos_coef (`float`):
            Relative classification weight applied to the no-object category.
        num_classes (`int`):
            Number of object categories, omitting the special no-object category.
    c                    s|   t    t|| _|j| _|j|j|jd| _	ddg| _
|j| _t|jd }| j|d< | d| |j| _|j| _d S )N)loss_vfl	loss_bbox	loss_giouvflr0   r   r1   empty_weight)r   r   r   matcherZ
num_labelsnum_classesZweight_loss_vflZweight_loss_bboxZweight_loss_giouweight_dictlossesZeos_coefficientZeos_coefr;   ZonesZregister_bufferZfocal_loss_alphar"   Zfocal_loss_gammar#   )r%   r&   rY   r'   r   r   r      s   



zRTDetrLoss.__init__Tc                 C   st  d|vrt dd|vrt d| |}|d | }tjdd t||D dd}tt| t|\}	}
t|	}	|d }td	d t||D }tj	|j
d d
 | jtj|jd}|||< tj|| jd ddd df }tj||jd}|	|j||< |d| }t| }| j|| j d|  | }tj|||dd}|d |j
d  | }d|iS )Nr   #No predicted boxes found in outputsr   z$No predicted logits found in outputsc                 S       g | ]\}\}}|d  | qS r/   r   r   _target_r7   r   r   r   r           z.RTDetrLoss.loss_labels_vfl.<locals>.<listcomp>r   dimc                 S   r_   r*   r   r`   r   r   r   r      rc   r   r:   devicer   r[   .r1   r9   none)weight	reductionrU   )KeyError_get_source_permutation_idxr;   rA   r   r   r   detachdiagfullr?   r[   r=   rg   rB   one_hotZ
zeros_liker:   toZ	unsqueezerC   r"   powr#    binary_cross_entropy_with_logitsmeansum)r%   rI   rJ   rL   	num_boxesrD   idx	src_boxestarget_boxesZiousrb   
src_logitstarget_classes_originaltarget_classestargetZtarget_score_originalZtarget_scoreZ
pred_scorerj   lossr   r   r   loss_labels_vfl   s0   

 zRTDetrLoss.loss_labels_vflc                 C   s   d|vrt d|d }| |}tdd t||D }tj|jdd | jtj|j	d}	||	|< t
|dd|	| j}
d	|
i}|S )
zClassification loss (NLL)
        targets dicts must contain the key "class_labels" containing a tensor of dim [nb_target_boxes]
        r   z#No logits were found in the outputsc                 S   r_   r*   r   r`   r   r   r   r      rc   z*RTDetrLoss.loss_labels.<locals>.<listcomp>Nr   rf   r   loss_ce)rl   rm   r;   rA   r   rp   r?   r[   r=   rg   rB   Zcross_entropy	transposeZclass_weight)r%   rI   rJ   rL   rw   rD   r{   rx   r|   r}   r   r]   r   r   r   loss_labels   s   
zRTDetrLoss.loss_labelsc                 C   sf   |d }|j }tjdd |D |d}|d|jd d kd}tj|	 |	 }	d|	i}
|
S )z
        Compute the cardinality error, i.e. the absolute error in the number of predicted non-empty boxes. This is not
        really a loss, it is intended for logging purposes only. It doesn't propagate gradients.
        r   c                 S   r3   r*   r4   r,   r   r   r   r      r6   z/RTDetrLoss.loss_cardinality.<locals>.<listcomp>)rg   r1   r   Zcardinality_error)
rg   r;   r<   Zargmaxr?   rv   nn
functionall1_lossfloat)r%   rI   rJ   rL   rw   r   rg   Ztarget_lengthsZ	card_predZcard_errr]   r   r   r   loss_cardinality   s   zRTDetrLoss.loss_cardinalityc                 C   s   d|vrt d| |}|d | }tjdd t||D dd}i }tj||dd}	|	 | |d	< d
tt	t
|t
| }
|
 | |d< |S )a;  
        Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss. Targets dicts must
        contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]. The target boxes are expected in
        format (center_x, center_y, w, h), normalized by the image size.
        r   r^   c                 S   r_   r/   r   )r   trb   r7   r   r   r   r      rc   z)RTDetrLoss.loss_boxes.<locals>.<listcomp>r   rd   ri   rk   rV   r   rW   )rl   rm   r;   rA   r   rB   r   rv   ro   r	   r   )r%   rI   rJ   rL   rw   rx   ry   rz   r]   rV   rW   r   r   r   
loss_boxes   s   
zRTDetrLoss.loss_boxesc                 C   s   d|vrt d| |}| |}|d }|| }dd |D }t| \}	}
|	|}	|	| }	tjj|dddf |	j	dd ddd	}|ddd
f 
d}|	
d}	|	|j	}	t||	|t||	|d}|S )z
        Compute the losses related to the masks: the focal loss and the dice loss. Targets dicts must contain the key
        "masks" containing a tensor of dim [nb_target_boxes, h, w].
        Z
pred_masksz#No predicted masks found in outputsc                 S   r)   )masksr   r   r   r   r   r   r     r.   z)RTDetrLoss.loss_masks.<locals>.<listcomp>NZbilinearF)sizemodeZalign_cornersr   r   )Z	loss_maskZ	loss_dice)rl   rm   _get_target_permutation_idxr
   Z	decomposerr   r   r   Zinterpolater?   r@   rE   r   r   )r%   rI   rJ   rL   rw   
source_idx
target_idxZsource_masksr   Ztarget_masksZvalidr]   r   r   r   
loss_masks  s(   





zRTDetrLoss.loss_masksc                 C   s   |d }|  |}tdd t||D }tj|jd d | jtj|jd}	||	|< t	j
|	| jd ddd d	f }
t	j||
d
 dd}|d |jd  | }d|iS )Nr   c                 S   r_   r*   r   r`   r   r   r   r   $  rc   z.RTDetrLoss.loss_labels_bce.<locals>.<listcomp>r   rf   r   rh   .r1   g      ?ri   r   Zloss_bce)rm   r;   rA   r   rp   r?   r[   r=   rg   rB   rq   rt   ru   rv   r%   rI   rJ   rL   rw   rD   r{   rx   r|   r}   r~   r   r   r   r   loss_labels_bce!  s   
 zRTDetrLoss.loss_labels_bcec                 C   4   t dd t|D }t dd |D }||fS )Nc                 S   s    g | ]\}\}}t ||qS r   r;   Z	full_like)r   r7   sourcerb   r   r   r   r   1  rc   z:RTDetrLoss._get_source_permutation_idx.<locals>.<listcomp>c                 S   s   g | ]\}}|qS r   r   )r   r   rb   r   r   r   r   2  r.   r;   rA   rG   )r%   rL   	batch_idxr   r   r   r   rm   /     z&RTDetrLoss._get_source_permutation_idxc                 C   r   )Nc                 S   s    g | ]\}\}}t ||qS r   r   )r   r7   rb   r~   r   r   r   r   7  rc   z:RTDetrLoss._get_target_permutation_idx.<locals>.<listcomp>c                 S   s   g | ]\}}|qS r   r   )r   rb   r~   r   r   r   r   8  r.   r   )r%   rL   r   r   r   r   r   r   5  r   z&RTDetrLoss._get_target_permutation_idxc                 C   s   d|vrt d|d }| |}tdd t||D }tj|jd d | jtj|j	d}	||	|< t
j|	| jd dd	d d
f }
t||
| j| j}|d |jd  | }d|iS )Nr   zNo logits found in outputsc                 S   r_   r*   r   r`   r   r   r   r   B  rc   z0RTDetrLoss.loss_labels_focal.<locals>.<listcomp>r   rf   r   rh   .r1   Z
loss_focal)rl   rm   r;   rA   r   rp   r?   r[   r=   rg   rB   rq   r   r"   r#   ru   rv   r   r   r   r   loss_labels_focal;  s   
 zRTDetrLoss.loss_labels_focalc                 C   sL   | j | j| j| j| j| j| jd}||vrtd| d|| ||||S )N)labelsZcardinalityr0   r   ZbceZfocalrX   zLoss z not supported)r   r   r   r   r   r   r   r$   )r%   r   rI   rJ   rL   rw   Zloss_mapr   r   r   get_lossM  s   	zRTDetrLoss.get_lossc           
   	   C   s   | d | d }}dd |D }|d d j }g }t|D ]A\}}|dkrItj|tj|d}	|	|}	t|| t|	ks?J ||| |	f q|tjdtj|dtjdtj|df q|S )Ndn_positive_idxdn_num_groupc                 S   r3   r*   r4   r   r   r   r   r   ^  r6   z6RTDetrLoss.get_cdn_matched_indices.<locals>.<listcomp>r   r+   rf   )	rg   rG   r;   Zaranger=   Ztiler5   appendZzeros)
Zdn_metarJ   r   r   Znum_gtsrg   Zdn_match_indicesr7   Znum_gtZgt_idxr   r   r   get_cdn_matched_indices[  s    
z"RTDetrLoss.get_cdn_matched_indicesc           
   	      s  dd |  D }||}tdd |D }tj|gtjtt| j	d}tj
|dd }i }jD ]}|||||fddD | q7d	|v rt|d	 D ]8\ }||}jD ]*}|d
krpqi|||||fddD  fdd  D | qiq\d|v rd|vrtd|d |}||d d  }t|d D ]8\ }jD ]0}|d
krqi }	j|||||fi |	fddD  fdd  D | qq|S )a  
        This performs the loss computation.

        Args:
             outputs (`dict`, *optional*):
                Dictionary of tensors, see the output specification of the model for the format.
             targets (`List[dict]`, *optional*):
                List of dicts, such that `len(targets) == batch_size`. The expected keys in each dict depends on the
                losses applied, see each loss' doc.
        c                 S   s   i | ]\}}d |vr||qS )auxiliary_outputsr   r   kr-   r   r   r   
<dictcomp>}  s    z&RTDetrLoss.forward.<locals>.<dictcomp>c                 s   s    | ]	}t |d  V  qdS )r+   Nr4   r   r   r   r   	<genexpr>  s    z%RTDetrLoss.forward.<locals>.<genexpr>rf   r   )minc                    *   i | ]}|j v r| | j |  qS r   r\   r   r   l_dictr%   r   r   r        * r   r   c                    r   r   r   r   r   r   r   r     r   c                        i | ]\}}|d    |qS )Z_aux_r   r   r7   r   r   r     rc   dn_auxiliary_outputsdenoising_meta_valuesz}The output must have the 'denoising_meta_values` key. Please, ensure that 'outputs' includes a 'denoising_meta_values' entry.r   c                    r   r   r   r   r   r   r   r     r   c                    r   )Z_dn_r   r   r   r   r   r     rc   )itemsrZ   rv   r;   r<   r   nextitervaluesrg   clampitemr]   r   updaterG   r$   r   )
r%   rI   rJ   Zoutputs_without_auxrL   rw   r]   r   r   kwargsr   )r7   r   r%   r   rM   r  sN   "




zRTDetrLoss.forward)T)rN   rO   rP   rQ   r   r   r   r;   rR   r   r   r   r   rm   r   r   r   staticmethodr   rM   rS   r   r   r'   r   rT      s"    





rT   c
                 K   s  t |}|| i }| |d< ||d< |jrv|	d ur2tj||	d dd\}}tj||	d dd\}}t|d d d df dd|d d d df dd}||d	< |d	 t|g|g |	d urvt|dd|dd|d
< |	|d< |||}t|	 }|||fS )Nr   r   Zdn_num_splitr   rd   r1   r   r   r   r   r   )
rT   rr   Zauxiliary_lossr;   rH   r   r   extendrv   r   )r   r   rg   r   r&   r   r   Zenc_topk_logitsZenc_topk_bboxesr   r   	criterionZoutputs_lossZdn_out_coordZdn_out_classr   Z	loss_dictr   r   r   r   RTDetrForObjectDetectionLoss  s(   
:

r   )NNNNN)r;   Ztorch.nnr   Ztorch.nn.functionalr   rB   utilsr   r   r   Zloss_for_object_detectionr   r   r	   r
   r   Zscipy.optimizer   Ztransformers.image_transformsr   ZjitZunusedr   Moduler   rT   r   r   r   r   r   <module>   s*   	
Q  <