o
    Zh                     @   s   d dl mZ d dlmZmZmZ d dlZd dlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZ d	d
lmZ ddlmZ eeZeG dd deZeG dd deZdgZdS )    )	dataclass)ListOptionalUnionN   )Cache)$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )AutoModelForImageTextToText   )ShieldGemma2Configc                   @   s$   e Zd ZU dZdZeej ed< dS )0ShieldGemma2ImageClassifierOutputWithNoAttentionz^ShieldGemma2 classifies imags as violative or not relative to a specific policy
    Args:
    Nprobabilities)	__name__
__module____qualname____doc__r   r   torchTensor__annotations__ r   r   e/var/www/auris/lib/python3.10/site-packages/transformers/models/shieldgemma2/modeling_shieldgemma2.pyr   $   s   
 r   c                        s"  e Zd ZeZdef fddZdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd Ze														d%deej deej deej deej deeeej ef  deej deej deej deej dee dee dee d ee d!eeejf d"efd#d$Z  ZS )&"ShieldGemma2ForImageClassificationconfigc                    s<   t  j|d t|dd| _t|dd| _tj|d| _d S )N)r   yes_token_indexi *  no_token_indexi  )super__init__getattrr   r   r   Zfrom_configmodel)selfr   	__class__r   r   r    1   s   z+ShieldGemma2ForImageClassification.__init__c                 C      | j j S N)r"   language_modelget_input_embeddingsr#   r   r   r   r)   7      z7ShieldGemma2ForImageClassification.get_input_embeddingsc                 C      | j j| d S r'   )r"   r(   set_input_embeddings)r#   valuer   r   r   r-   :      z7ShieldGemma2ForImageClassification.set_input_embeddingsc                 C   r&   r'   )r"   r(   get_output_embeddingsr*   r   r   r   r0   =   r+   z8ShieldGemma2ForImageClassification.get_output_embeddingsc                 C   r,   r'   )r"   r(   set_output_embeddings)r#   Znew_embeddingsr   r   r   r1   @   r/   z8ShieldGemma2ForImageClassification.set_output_embeddingsc                 C   r,   r'   )r"   r(   set_decoder)r#   decoderr   r   r   r2   C   r/   z.ShieldGemma2ForImageClassification.set_decoderc                 C   r&   r'   )r"   r(   get_decoderr*   r   r   r   r4   F   r+   z.ShieldGemma2ForImageClassification.get_decoderc                 C   r&   r'   )r"   r(   tie_weightsr*   r   r   r   r5   I   r+   z.ShieldGemma2ForImageClassification.tie_weightsNr   	input_idspixel_valuesattention_maskposition_idspast_key_valuestoken_type_idscache_positioninputs_embedslabels	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictlogits_to_keepreturnc                 K   sh   | j d|||||||||	|
||||d|}|j}|ddd| j| jgf }tj|dd}t||dS )aY  
        Returns:
            A `ShieldGemma2ImageClassifierOutputWithNoAttention` instance containing the logits and probabilities
            associated with the model predicting the `Yes` or `No` token as the response to that prompt, captured in the
            following properties.

                *   `logits` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the logits for the `Yes` token and the second position along dim=1 is
                    the logits for the `No` token.
                *   `probabilities` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the probability of predicting the `Yes` token and the second position
                    along dim=1 is the probability of predicting the `No` token.

            ShieldGemma prompts are constructed such that predicting the `Yes` token means the content *does violate* the
            policy as described. If you are only interested in the violative condition, use
            `violated = outputs.probabilities[:, 1]` to extract that slice from the output tensors.

            When used with the `ShieldGemma2Processor`, the `batch_size` will be equal to `len(images) * len(policies)`,
            and the order within the batch will be img1_policy1, ... img1_policyN, ... imgM_policyN.
        )r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   N)dim)logitsr   r   )r"   rG   r   r   r   Zsoftmaxr   )r#   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   Z	lm_kwargsZoutputsrG   Zselected_logitsr   r   r   r   forwardL   s2   'z*ShieldGemma2ForImageClassification.forward)NNNNNNNNNNNNNr   )r   r   r   r   Zconfig_classr    r)   r-   r0   r1   r2   r4   r5   r
   r   r   Z
LongTensorZFloatTensorr   r   r   r   boolintr   rH   __classcell__r   r   r$   r   r   -   sp    	
r   )dataclassesr   typingr   r   r   r   Ztorch.utils.checkpointZcache_utilsr   Zmodeling_outputsr   Zmodeling_utilsr	   utilsr
   r   autor   Zconfiguration_shieldgemma2r   Z
get_loggerr   loggerr   r   __all__r   r   r   r   <module>   s"   
`