o
    Zh2                     @   s   d Z ddlmZ ddlmZmZmZ ddlZddl	m
Z
 ddlmZmZmZ ddlmZmZmZmZ dd	lmZmZ dd
lmZ e rGddlZe rNddlZG dd deZG dd deddZG dd deZdgZdS )z
Processor class for SAM.
    )deepcopy)ListOptionalUnionN   )
ImageInput)ImagesKwargsProcessingKwargsProcessorMixin)
AudioInputBatchEncodingPreTokenizedInput	TextInput)is_tf_availableis_torch_available)
VideoInputc                   @   sf   e Zd ZU ee ed< eeee   ed< eeee   ed< eeeee    ed< ee ed< dS )SamImagesKwargssegmentation_mapsinput_pointsinput_labelsinput_boxespoint_pad_valueN)	__name__
__module____qualname__r   r   __annotations__r   floatint r   r   U/var/www/auris/lib/python3.10/site-packages/transformers/models/sam/processing_sam.pyr   &   s   
 r   c                   @   s"   e Zd ZU eed< dddiiZdS )SamProcessorKwargsimages_kwargsr   N)r   r   r   r   r   	_defaultsr   r   r   r   r    .   s   
 r    F)totalc                       s   e Zd ZdZdgZdZg dZ fddZ	d!ddddd	ee	 d
ee
eeee ee f  dee dee def
ddZ					d"ddZdd Z	d#dedejdejfddZ			d$ddZedd Zdd  Z  ZS )%SamProcessora  
    Constructs a SAM processor which wraps a SAM image processor and an 2D points & Bounding boxes processor into a
    single processor.

    [`SamProcessor`] offers all the functionalities of [`SamImageProcessor`]. See the docstring of
    [`~SamImageProcessor.__call__`] for more information.

    Args:
        image_processor (`SamImageProcessor`):
            An instance of [`SamImageProcessor`]. The image processor is a required input.
    image_processorZSamImageProcessor)r   r   r   r   c                    s   t  | | jjd | _d S )Nlongest_edge)super__init__r&   sizetarget_size)selfr&   	__class__r   r   r)   N   s   zSamProcessor.__init__N)textaudiovideoimagesr/   r0   r1   returnc             	   O   s   | j tfdi i|| j| }|d dd}|d dd}	|d dd}
|d dd}| j|fi |d }|d }t|d	rH| }| j||	|
d
\}}	}
| j||||	|
|d 	d|d}|S )z
        This method uses [`SamImageProcessor.__call__`] method to prepare image(s) for the model. It also prepares 2D
        points and bounding boxes for the model if they are provided.
        Ztokenizer_init_kwargsr!   r   Nr   r   r   original_sizesnumpy)r   r   r   Zcommon_kwargsreturn_tensors)r   r   r   r6   r   )
Z_merge_kwargsr    Z'prepare_and_validate_optional_call_argspopr&   hasattrr5   _check_and_preprocess_points_normalize_and_convertget)r,   r2   r/   r0   r1   argskwargsZoutput_kwargsr   r   r   r   encoding_image_processorr4   r   r   r   __call__R   sH   

zSamProcessor.__call__ptr"   c                    sR   d ur@t t  krfdd D  nfddt D  t fdd D s;|d ur; ||\ }t  |d urIt|}|d urqt t |kr`fdd|D }nfddt|D }t|}|d ur|dkrt|}t |jd	kr|	d
n|}n|dkrt
|}t |jd	krt
|d
n|}|d|i  d ur|dkrt  t  jdkrň 	d
n  n|dkrt
  t  jdkrt
 d
n  |d i |d ur'|dkrt|}t |jd	kr|	d
n|}n|dkr t
|}t |jd	krt
|d
n|}|d|i |S )Nc                    s    g | ]} j| d  qS r   _normalize_coordinatesr+   .0pointr4   r,   r   r   
<listcomp>   s    z7SamProcessor._normalize_and_convert.<locals>.<listcomp>c                    s    g | ]\}}   j||qS r   rB   )rE   rF   original_sizer,   r   r   rH      s    c                 3   s     | ]}|j  d  j kV  qdS )r   NshaperD   )r   r   r   	<genexpr>   s    z6SamProcessor._normalize_and_convert.<locals>.<genexpr>c                    s$   g | ]}j j| d  ddqS )r   Tis_bounding_boxrB   rE   boxrG   r   r   rH      s    c                    s$   g | ]\}} j  j||d dqS )TrN   rB   )rE   rQ   rI   rJ   r   r   rH      s    r@   r      tfr      r   r   )lenzipall_pad_points_and_labelsnparraytorchZ
from_numpyrL   Z	unsqueezerS   Zconvert_to_tensorZexpand_dimsupdate)r,   r>   r4   r   r   r   r6   r   r   )r   r4   r,   r   r:      sd   












 

 z#SamProcessor._normalize_and_convertc                 C   s   t dd |D }g }t|D ]0\}}|jd |kr:tj|t||jd  df| gdd}t|| |g||< || q|}||fS )zh
        The method pads the 2D points and labels to the maximum number of points in the batch.
        c                 S   s   g | ]}|j d  qS rA   rK   rD   r   r   r   rH          z7SamProcessor._pad_points_and_labels.<locals>.<listcomp>r      )Zaxis)max	enumeraterL   rY   ZconcatenateZzerosappend)r,   r   r   r   Zexpected_nb_pointsZprocessed_input_pointsirF   r   r   r   rX      s    z#SamProcessor._pad_points_and_labelsFr+   coordsc           	      C   sx   |\}}| j j||d\}}t|t}|r|ddd}|d ||  |d< |d ||  |d< |r:|dd}|S )z~
        Expects a numpy array of length 2 in the final dimension. Requires the original image size in (H, W) format.
        )r'   r^   ).r   ).rR   rT   )r&   Z_get_preprocess_shaper   astyper   Zreshape)	r,   r+   rc   rI   rO   Zold_hZold_wZnew_hZnew_wr   r   r   rC      s   z#SamProcessor._normalize_coordinatesc                 C   s  |dur't |dr|  }t|trt|d tstddd |D }nd}|durPt |dr8|  }t|trDt|d tsHtddd |D }nd}|durt |dra|  }t|trvt|d trvt|d d tsztd	d
d |D }nd}|||fS )a8  
        Check and preprocesses the 2D points, labels and bounding boxes. It checks if the input is valid and if they
        are, it converts the coordinates of the points and bounding boxes. If a user passes directly a `torch.Tensor`,
        it is converted to a `numpy.ndarray` and then to a `list`.
        Nr5   r   z7Input points must be a list of list of floating points.c                 S      g | ]}t |qS r   rY   rZ   )rE   Zinput_pointr   r   r   rH     r]   z=SamProcessor._check_and_preprocess_points.<locals>.<listcomp>z-Input labels must be a list of list integers.c                 S   rf   r   rg   )rE   labelr   r   r   rH     r]   z>Input boxes must be a list of list of list of floating points.c                 S   s   g | ]}t |t jqS r   )rY   rZ   re   Zfloat32rP   r   r   r   rH   )  s    )r8   r5   tolist
isinstancelist
ValueError)r,   r   r   r   r   r   r   r9      s6   



z)SamProcessor._check_and_preprocess_pointsc                 C   s   | j j}tt|S N)r&   model_input_namesrk   dictfromkeys)r,   Zimage_processor_input_namesr   r   r   rn   /  s   zSamProcessor.model_input_namesc                 O   s   | j j|i |S rm   )r&   post_process_masks)r,   r<   r=   r   r   r   rq   4  s   zSamProcessor.post_process_masksrm   )NNNr@   r"   )F)NNN)r   r   r   __doc__
attributesZimage_processor_classZoptional_call_argsr)   r   r   r   r   r   r   r   r   r   r?   r:   rX   r   rY   ZndarrayrC   r9   propertyrn   rq   __classcell__r   r   r-   r   r%   7   sX    


?
N

/
r%   ) rr   copyr   typingr   r   r   r5   rY   Zimage_utilsr   Zprocessing_utilsr   r	   r
   Ztokenization_utils_baser   r   r   r   utilsr   r   Zvideo_utilsr   r[   Z
tensorflowrS   r   r    r%   __all__r   r   r   r   <module>   s&   	  
