o
    Zh+                     @   s   d dl Z d dlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
mZ e r+d dlmZ 		dd	e jd
e jdededef
ddZG dd deZG dd de	Z				dddZdS )    N   )center_to_corners_format)is_scipy_available   )HungarianMatcher	ImageLoss_set_aux_lossgeneralized_box_ioulinear_sum_assignment      ?inputstargets	num_boxesalphagammac           
      C   sv   |   }tjj| |dd}|| d| d|   }|d| |  }|dkr5|| d| d|   }	|	| }| | S )aJ  
    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.

    Args:
        inputs (`torch.FloatTensor` of arbitrary shape):
            The predictions for each example.
        targets (`torch.FloatTensor` with the same shape as `inputs`)
            A tensor storing the binary classification label for each element in the `inputs` (0 for the negative class
            and 1 for the positive class).
        num_boxes (`int`):
            The total number of boxes in the batch.
        alpha (`float`, *optional*, defaults to 0.25):
            Optional weighting factor in the range (0,1) to balance positive vs. negative examples.
        gamma (`int`, *optional*, defaults to 2):
            Exponent of the modulating factor (1 - p_t) to balance easy vs hard examples.

    Returns:
        Loss tensor
    none)Z	reductionr   r   )sigmoidnnZ
functionalZ binary_cross_entropy_with_logitssum)
r   r   r   r   r   ZprobZce_lossZp_tlossZalpha_t r   T/var/www/auris/lib/python3.10/site-packages/transformers/loss/loss_grounding_dino.pysigmoid_focal_loss   s   r   c                   @   s   e Zd Ze dd ZdS )GroundingDinoHungarianMatcherc                 C   sd  |d j dd \}}|d dd }|d dd}|d }tdd	 t||D }||jd
dd }tdd	 |D }d}	d}
d|	 ||
  d| d    }|	d| |
  |d    }|| |  }tj	||dd}t
t|t| }| j| | j|  | j|  }|||d
 }dd	 |D }dd	 t||d
D }dd	 |D S )a  
        Args:
            outputs (`dict`):
                A dictionary that contains at least these entries:
                * "logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                * "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates.
                * "label_maps": Tuple of tensors of dim [num_classes, hidden_dim].
            targets (`List[dict]`):
                A list of targets (len(targets) = batch_size), where each target is a dict containing:
                * "class_labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of
                  ground-truth
                 objects in the target) containing the class labels
                * "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates.

        Returns:
            `List[Tuple]`: A list of size `batch_size`, containing tuples of (index_i, index_j) where:
            - index_i is the indices of the selected predictions (in order)
            - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds: len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        logitsNr   r   r   
pred_boxes
label_mapsc                 S   s   g | ]
\}}||d   qS )class_labelsr   ).0Z	label_maptargetr   r   r   
<listcomp>b       z9GroundingDinoHungarianMatcher.forward.<locals>.<listcomp>T)dimZkeepdimc                 S   s   g | ]}|d  qS boxesr   r   vr   r   r   r!   g   s    r          @g:0yE>)pc                 S   s   g | ]}t |d  qS r%   lenr'   r   r   r   r!   {   s    c                 S   s   g | ]
\}}t || qS r   r
   )r   icr   r   r   r!   |   r"   c                 S   s0   g | ]\}}t j|t jd t j|t jd fqS ))dtype)torchZ	as_tensorZint64)r   r-   jr   r   r   r!   }   s   0 )shapeflattenr   r0   catzipr   logtZcdistr	   r   	bbox_cost
class_cost	giou_costviewcpu	enumeratesplit)selfoutputsr   Z
batch_sizeZnum_queriesZout_probZout_bboxr   Ztarget_bboxr   r   Zneg_cost_classZpos_cost_classr9   r8   r:   Zcost_matrixsizesindicesr   r   r   forwardD   s&   "z%GroundingDinoHungarianMatcher.forwardN)__name__
__module____qualname__r0   Zno_gradrC   r   r   r   r   r   C   s    r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	GroundingDinoImageLossa  
    This class computes the losses for `GroundingDinoForObjectDetection`. The process happens in two steps: 1) we
    compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of
    matched ground-truth / prediction (supervise class and box).

    Args:
        matcher (`GroundingDinoHungarianMatcher`):
            Module able to compute a matching between targets and proposals.
        focal_alpha (`float`):
            Alpha parameter in focal loss.
        losses (`List[str]`):
            List of all the losses to be applied. See `get_loss` for a list of all available losses.
    c                 C   s"   t j|  || _|| _|| _d S N)r   Module__init__matcherfocal_alphalosses)r?   rK   rL   rM   r   r   r   rJ      s   
zGroundingDinoImageLoss.__init__c           	         sr    d }t  fddtt||D }t j d dd}| |}t j||jt jd}|| t j||< |S )z>
        Create one_hot based on the matching indices
        r   c                    sH   g | ] \}\}\}}|d kr|d | t  d |  n|d | qS )r   r   r   r+   )r   r-   r    _Jr@   r   r   r!      s    .zFGroundingDinoImageLoss._get_target_classes_one_hot.<locals>.<listcomp>r   r   )r$   )devicer/   )	r0   r4   r=   r5   Z_get_source_permutation_idxZ
zeros_likerQ   longto)	r?   r@   r   rB   r   r   r   idxtarget_classes_onehotr   rP   r   _get_target_classes_one_hot   s   

z2GroundingDinoImageLoss._get_target_classes_one_hotc           
      C   s~   d|vrt dd|vrt d| |||}|d }|d }t||}t||}| }t|||| jdd}d|i}	|	S )z
        Classification loss (Binary focal loss) targets dicts must contain the key "class_labels" containing a tensor
        of dim [nb_target_boxes]
        r   z#No logits were found in the outputs	text_maskz&No text_mask were found in the outputsr   )r   r   r   r   r   loss_ce)KeyErrorrV   r0   Zmasked_selectfloatr   rL   )
r?   r@   r   rB   r   rU   Zsource_logitsrW   rX   rM   r   r   r   loss_labels   s&   z"GroundingDinoImageLoss.loss_labelsN)rD   rE   rF   __doc__rJ   rV   r[   r   r   r   r   rG      s
    rG   c                    sp  t |j|j|jd}g d}t||j|d}|| i }| |d< ||d< ||d< ||d< d }|jrHt||}|D ]
}||d< ||d< q9||d< ||||j	rj|	|
||d	}|||}d
d |
 D }| d|j|jd|j	rdd 
 D }| |jri }t|jd D ] | fdd
 D  q| tfdd D }||fS )N)r9   r8   r:   )labelsr&   Zcardinality)rK   rL   rM   r   r   r   rW   auxiliary_outputs)r   r   r   rW   c                 S      i | ]	\}}|d  |qS Z_encr   r   kr(   r   r   r   
<dictcomp>       z7GroundingDinoForObjectDetectionLoss.<locals>.<dictcomp>r)   )rX   Z	loss_bboxZ	loss_giouc                 S   r_   r`   r   ra   r   r   r   rc     rd   r   c                    s    i | ]\}}|d    |qS )rN   r   ra   )r-   r   r   rc     s     c                 3   s(    | ]}|v r | |  V  qd S rH   r   )r   rb   )	loss_dictweight_dictr   r   	<genexpr>  s   & z6GroundingDinoForObjectDetectionLoss.<locals>.<genexpr>)r   r9   r8   r:   rG   rL   rS   Zauxiliary_lossr   Z	two_stageitemsupdateZbbox_loss_coefficientZgiou_loss_coefficientrangeZdecoder_layersr   keys)r   r]   rQ   r   configr   rW   Zoutputs_classZoutputs_coordZencoder_logitsZencoder_pred_boxesrK   rM   	criterionZoutputs_lossr^   Z
aux_outputZencoder_outputs_lossZencoder_loss_dictZenc_weight_dictZaux_weight_dictr   r   )r-   re   rf   r   #GroundingDinoForObjectDetectionLoss   s\   








rn   )r   r   )NNNN)r0   Ztorch.nnr   Zimage_transformsr   utilsr   Zloss_for_object_detectionr   r   r   r	   Zscipy.optimizer   ZTensorintrZ   r   r   rG   rn   r   r   r   r   <module>   s6   	
'=Q