o
    Zh1                     @   s   d Z ddlmZ ddlmZmZmZ ddlZddl	m
Z
 ddlmZmZmZmZ ddlmZmZmZmZ dd	lmZ dd
lmZ e rGddlZG dd deZG dd deddZG dd deZdgZdS )z
Processor class for SAMHQ.
    )deepcopy)ListOptionalUnionN   )
ImageInput)ImagesKwargsProcessingKwargsProcessorMixinUnpack)
AudioInputBatchEncodingPreTokenizedInput	TextInput)is_torch_available)
VideoInputc                   @   sf   e Zd ZU ee ed< eeee   ed< eeee   ed< eeeee    ed< ee ed< dS )SamHQImagesKwargssegmentation_mapsinput_pointsinput_labelsinput_boxespoint_pad_valueN)	__name__
__module____qualname__r   r   __annotations__r   floatint r   r   Z/var/www/auris/lib/python3.10/site-packages/transformers/models/sam_hq/processing_samhq.pyr   #   s   
 r   c                   @   s"   e Zd ZU eed< dddiiZdS )SamHQProcessorKwargsimages_kwargsr   N)r   r   r   r   r   	_defaultsr   r   r   r   r    +   s   
 r    F)totalc                       s  e Zd ZdZdgZdZg dZ fddZ	d)ddddd	ee	 d
ee
eeee ee f  dee dee dee defddZ					d*ddZdd Z	d+dedejdejfddZd,ddZ			d-dd Zed!d" Zd#d$ Zd%d& Zd+d'd(Z   Z!S ).SamHQProcessora  
    Constructs a SAM HQ processor which wraps a SAM  image processor and an 2D points & Bounding boxes processor into a
    single processor.

    [`SamHQProcessor`] offers all the functionalities of [`SamImageProcessor`]. See the docstring of
    [`~SamImageProcessor.__call__`] for more information.

    Args:
        image_processor (`SamImageProcessor`):
            An instance of [`SamImageProcessor`]. The image processor is a required input.
    image_processorZSamImageProcessor)r   r   r   r   c                    sD   t  | t| dstdt| jdstd| jjd | _d S )Nr%   z,image_processor was not properly initializedsizezimage_processor.size is not setlongest_edge)super__init__hasattr
ValueErrorr%   r&   target_size)selfr%   	__class__r   r   r)   K   s   
zSamHQProcessor.__init__N)textaudiovideoimagesr0   r1   r2   kwargsreturnc             
   O   s   | j tfdi i|| j| }|d dd}|d dd}	|d dd}
| j|fi |d }|d }t|dr@| }| j||	|
d	\}}	}
| j||||	|
|d
 	d|d 	dd}|S )z
        This method uses [`SamImageProcessor.__call__`] method to prepare image(s) for the model. It also prepares 2D
        points and bounding boxes for the model if they are provided.
        Ztokenizer_init_kwargsr!   r   Nr   r   original_sizesnumpy)r   r   r   Zcommon_kwargsreturn_tensorsr   )r   r   r   r8   r   )
Z_merge_kwargsr    Z'prepare_and_validate_optional_call_argspopr%   r*   r7   _check_and_preprocess_points_normalize_and_convertget)r-   r3   r0   r1   r2   argsr4   Zoutput_kwargsr   r   r   encoding_image_processorr6   r   r   r   __call__T   sF   

zSamHQProcessor.__call__ptc                    s    dur'|   | t fdd D s"|dur"|  ||\ }t  |dur0t|}|durA| j ||dd}t|}|durN| |d||d<  dur[|  d||d	< |durh| |d||d
< |S )zZ
        Normalize and convert the image processor output to the expected format.
        Nc                 3   s     | ]}|j  d  j kV  qdS )r   Nshape.0pointr   r   r   	<genexpr>   s    z8SamHQProcessor._normalize_and_convert.<locals>.<genexpr>Tis_bounding_boxr   r      r   r   )_normalize_batch_coordinatesall_pad_points_and_labelsnparray
_to_tensor)r-   r>   r6   r   r   r   r8   r   r   rG   r   r;      s(   


z%SamHQProcessor._normalize_and_convertc                 C   s   t dd |D }g }t|D ]0\}}|jd |kr:tj|t||jd  df| gdd}t|| |g||< || q|}||fS )zh
        The method pads the 2D points and labels to the maximum number of points in the batch.
        c                 S   s   g | ]}|j d  qS )r   rB   rD   r   r   r   
<listcomp>   s    z9SamHQProcessor._pad_points_and_labels.<locals>.<listcomp>r      )Zaxis)max	enumeraterC   rO   ZconcatenateZzerosappend)r-   r   r   r   Zexpected_nb_pointsZprocessed_input_pointsirF   r   r   r   rN      s    z%SamHQProcessor._pad_points_and_labelsFr,   coordsc           	      C   sx   |\}}| j j||d\}}t|t}|r|ddd}|d ||  |d< |d ||  |d< |r:|dd}|S )z}
        Expects a numpy array of length 2 in the final dimension. Requires the original image size in (H,W) format.
        )r'   rS   ).r   ).   rK   )r%   Z_get_preprocess_shaper   Zastyper   Zreshape)	r-   r,   rX   Zoriginal_sizerJ   Zold_hZold_wZnew_hZnew_wr   r   r   _normalize_coordinates   s   z%SamHQProcessor._normalize_coordinatesrZ   c                    s   |du rdS t |dr|  }t|t}|}t|D ]}|r"|s$ n|o,t|d t}|r3|d nd}q|s<t| fdd|D S )a  
        Preprocess input by converting torch tensors to numpy arrays and validating structure.

        Args:
            inputs: The input to process
            error_message: Error message if validation fails
            expected_nesting: Expected nesting level (1 for points/labels, 2 for boxes)
            dtype: Optional data type for numpy array conversion

        Returns:
            Processed input as list of numpy arrays or None
        Nr7   r   c                    s   g | ]	}t j| d qS )dtype)rO   rP   rE   itemr\   r   r   rR     s    z4SamHQProcessor._preprocess_input.<locals>.<listcomp>)r*   r7   tolist
isinstancelistranger+   )r-   inputserror_messageexpected_nestingr]   Zvalidcurrent_r   r\   r   _preprocess_input   s   

z SamHQProcessor._preprocess_inputc                 C   s6   |  |d}|  |d}| j |ddtjd}|||fS )a8  
        Check and preprocesses the 2D points, labels and bounding boxes. It checks if the input is valid and if they
        are, it converts the coordinates of the points and bounding boxes. If a user passes directly a `torch.Tensor`,
        it is converted to a `numpy.ndarray` and then to a `list`.
        z7Input points must be a list of list of floating points.z-Input labels must be a list of list integers.z>Input boxes must be a list of list of list of floating points.rS   )rf   r]   )ri   rO   Zfloat32)r-   r   r   r   r   r   r   r:     s   
z+SamHQProcessor._check_and_preprocess_pointsc                 C   s   | j j}tt|S N)r%   model_input_namesrb   dictfromkeys)r-   Zimage_processor_input_namesr   r   r   rk     s   z SamHQProcessor.model_input_namesc                 O   s   | j j|i |S rj   )r%   post_process_masks)r-   r=   r4   r   r   r   rn   !  s   z!SamHQProcessor.post_process_masksc                 C   s.   |dkrt |}|j|k r|dS |S |S )a  
        Convert numpy array to tensor and ensure proper dimensionality.
        Args:
            array: The numpy array to convert
            min_dim: The minimum number of dimensions the result should have
            return_tensors: The type of tensors to return (e.g., "pt" for PyTorch tensors)
        Returns:
            The converted array or tensor with proper dimensions
        r@   rZ   )torchZ
from_numpyndimZ	unsqueeze)r-   rP   Zmin_dimr8   r   r   r   rQ   $  s   

zSamHQProcessor._to_tensorc                    s@   t t |kr fdd|D S  fddt|D S )a0  
        Normalize coordinates based on original sizes.
        Args:
            inputs: List of coordinate arrays
            original_sizes: Original sizes of the images
            is_bounding_box: Whether inputs are bounding boxes
        Returns:
            Normalized coordinates as list
        c                    s$   g | ]}j j|d   dqS )r   rI   r[   r,   r^   rJ   r6   r-   r   r   rR   ?  s    z?SamHQProcessor._normalize_batch_coordinates.<locals>.<listcomp>c                    s$   g | ]\}}j j|| d qS )rI   rq   )rE   r_   r&   )rJ   r-   r   r   rR   E  s    )lenzip)r-   rd   r6   rJ   r   rr   r   rL   3  s   
z+SamHQProcessor._normalize_batch_coordinatesrj   )NNNr@   rA   )F)rZ   N)NNN)"r   r   r   __doc__
attributesZimage_processor_classZoptional_call_argsr)   r   r   r   r   r   r   r   r   r   r    r   r?   r;   rN   r   rO   Zndarrayr[   ri   r:   propertyrk   rn   rQ   rL   __classcell__r   r   r.   r   r$   4   sb    


>
,

&

r$   )ru   copyr   typingr   r   r   r7   rO   Zimage_utilsr   Zprocessing_utilsr   r	   r
   r   Ztokenization_utils_baser   r   r   r   utilsr   Zvideo_utilsr   ro   r   r    r$   __all__r   r   r   r   <module>   s"   	  
