o
    Zhe                     @   s  d dl mZ d dlmZmZ d dlmZmZmZm	Z	 d dl
ZddlmZmZmZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZm Z m!Z! dd	l"m#Z# dd
l$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ e* rqddlm,Z, e' rxd dl-Z-e( rddlm.Z. e) rd dl/m0Z1 n	d dl2m0Z1 ndZ.e+3e4Z5edddddddddddddddej6fdee7 dee8 dee7 dee	e8e9e8 f  dee	e8e9e8 f  dee7 dee: dee7 dee dee7 dee ded dee	e;e%f  dee fdd Z<d9d!d"d#ee: d$d"fd%d&Z=d'ee d$e9e fd(d)Z>d*e9d" d$e?e: fd+d,Z@d-e	ejAd"f d.e:d$e9e	ejAd"f  fd/d0ZBG d1d2 d2ed3d4ZCe&G d5d6 d6eZDG d7d8 d8ZEdS ):    )Iterable)	lru_cachepartial)AnyOptional	TypedDictUnionN   )BaseImageProcessorBatchFeatureget_size_dict)convert_to_rgbget_resize_output_image_sizeget_size_with_aspect_ratiogroup_images_by_shapereorder_images)ChannelDimension
ImageInput	ImageTypeSizeDictget_image_size#get_image_size_for_max_height_widthget_image_typeinfer_channel_dimension_formatmake_flat_list_of_imagesvalidate_kwargsvalidate_preprocess_arguments)Unpack)
TensorTypeauto_docstringis_torch_availableis_torchvision_availableis_torchvision_v2_availableis_vision_availablelogging)PILImageResampling)pil_torch_interpolation_mapping)
functional
   maxsize
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padsize_divisibilitydo_center_crop	crop_size	do_resizesizeresampler%   return_tensorsdata_formatc                 C   sN   t | |||||||||	|
|d |dur|dkrtd|tjkr%tddS )z
    Checks validity of typically used arguments in an `ImageProcessorFast` `preprocess` method.
    Raises `ValueError` if arguments incompatibility is caught.
    )r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   Nptz6Only returning PyTorch tensors is currently supported.z6Only channel first data format is currently supported.)r   
ValueErrorr   FIRST)r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8    r<   W/var/www/auris/lib/python3.10/site-packages/transformers/image_processing_utils_fast.py"validate_fast_preprocess_argumentsI   s&   
r>   tensortorch.Tensoraxisreturnc                 C   s6   |du r|   S z| j |dW S  ty   |  Y S w )zF
    Squeezes a tensor, but only if the axis specified has dim 1.
    N)rA   )Zsqueezer:   )r?   rA   r<   r<   r=   safe_squeezet   s   rC   valuesc                 C   s   dd t |  D S )zO
    Return the maximum value across all indices of an iterable of values.
    c                 S   s   g | ]}t |qS r<   )max).0Zvalues_ir<   r<   r=   
<listcomp>       z&max_across_indices.<locals>.<listcomp>)zip)rD   r<   r<   r=   max_across_indices   s   rJ   imagesc                 C   s    t dd | D \}}}||fS )zH
    Get the maximum height and width across all images in a batch.
    c                 S   s   g | ]}|j qS r<   )shape)rF   Zimgr<   r<   r=   rG      s    z(get_max_height_width.<locals>.<listcomp>)rJ   )rK   _
max_height	max_widthr<   r<   r=   get_max_height_width   s   rP   image
patch_sizec                 C   sj   g }t | tjd\}}td||D ]!}td||D ]}| dd||| ||| f }|| qq|S )a6  
    Divides an image into patches of a specified size.

    Args:
        image (`Union[np.array, "torch.Tensor"]`):
            The input image.
        patch_size (`int`):
            The size of each patch.
    Returns:
        list: A list of Union[np.array, "torch.Tensor"] representing the patches.
    )Zchannel_dimr   N)r   r   r;   rangeappend)rQ   rR   Zpatchesheightwidthijpatchr<   r<   r=   divide_to_patches   s   "rZ   c                   @   s  e Zd ZU ee ed< eeeef  ed< ee ed< ee	d  ed< ee ed< eeeef  ed< ee ed< ee	ee
f  ed	< ee ed
< ee	e
ee
 f  ed< ee	e
ee
 f  ed< ee ed< ee	eef  ed< ee ed< ee	eef  ed< ed ed< dS )DefaultFastImageProcessorKwargsr4   r5   default_to_squarer%   F.InterpolationModer6   r2   r3   r+   r,   r-   r.   r/   do_convert_rgbr7   r8   input_data_formattorch.devicedeviceN)__name__
__module____qualname__r   bool__annotations__dictstrintr   floatlistr   r   r<   r<   r<   r=   r[      s"   
 r[   F)totalc                       s  e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZdZdZdZejZdZdZdgZeZdZdee ddf fddZ		dHd	d
dedddedd
f
ddZd	d
dedd
fddZd	d
de ee!e f de ee!e f dd
fddZ"e#dd						dIde$e de$e ee%e f  de$e ee%e f  de$e de$e de$d  de&fd!d"Z'd#d
dededede ee%e f de ee%e f dd
fd$d%Z(d	d
de)e*e+f dd
fd&d'Z,d	e-de-fd(d)Z.de)fd*d+Z/d#e-de-fd,d-Z0			dJd	e-d.e$e d/e$e e*ef  de$d  dd
f
d0d1Z1			dJd#e-d.e$e d/e$e e*ef  de$d  de%d
 f
d2d3Z2						dIde$e d4e$e d5e$e de$e ee%e f  de$e ee%e f  d6e$e de)fd7d8Z3												dKde$e de$e de$e de$e ee&e f  de$e ee&e f  d9e$e de$e d:e$e d4e$e d;e$e d<  d=e$e e*e4f  d6e$e fd>d?Z5d#e-dee de6fd@dAZ7e8d#e-dee de6fdBdCZ9d#e%d
 d9edede$d d:ed4edededede$e ee%e f  de$e ee%e f  d=e$e e*e4f  de6fdDdEZ: fdFdGZ;  Z<S )LBaseImageProcessorFastNTgp?pixel_valueskwargsrB   c                    s   t  jdi | | |}|d| j}|d ur$t||d| jdnd | _|d| j}|d ur8t|ddnd | _| jj	
 D ]}||d }|d urTt| || qAt| |t| |d  qAt| jj	
 | _d S )Nr5   r\   r5   r\   r3   
param_namer<   )super__init__filter_out_unused_kwargspopr5   r   r\   r3   valid_kwargsrg   keyssetattrgetattrrl   _valid_kwargs_names)selfrp   r5   r3   keykwarg	__class__r<   r=   ru      s   
zBaseImageProcessorFast.__init__rQ   r@   r5   interpolationr^   	antialiasc                 K   s   |dur|nt jj}|jr|jrt| dd |j|j}n8|jr-t||jdtj	d}n*|j
rB|jrBt| dd |j
|j}n|jrO|jrO|j|jf}ntd| dt j||||dS )a@  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`torch.Tensor`):
                Image to resize.
            size (`SizeDict`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            interpolation (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
                `InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.

        Returns:
            `torch.Tensor`: The resized image.
        NF)r5   r\   r`   zjSize must contain 'height' and 'width' keys, or 'max_height' and 'max_width', or 'shortest_edge' key. Got .)r   r   )FZInterpolationModeZBILINEARZshortest_edgeZlongest_edger   r5   r   r   r;   rN   rO   r   rU   rV   r:   resize)r}   rQ   r5   r   r   rp   new_sizer<   r<   r=   r      s0   zBaseImageProcessorFast.resizescalec                 K   s   || S )a?  
        Rescale an image by a scale factor. image = image * scale.

        Args:
            image (`torch.Tensor`):
                Image to rescale.
            scale (`float`):
                The scaling factor to rescale pixel values by.

        Returns:
            `torch.Tensor`: The rescaled image.
        r<   )r}   rQ   r   rp   r<   r<   r=   rescale  s   zBaseImageProcessorFast.rescalemeanstdc                 K   s   t |||S )a  
        Normalize an image. image = (image - image_mean) / image_std.

        Args:
            image (`torch.Tensor`):
                Image to normalize.
            mean (`torch.Tensor`, `float` or `Iterable[float]`):
                Image mean to use for normalization.
            std (`torch.Tensor`, `float` or `Iterable[float]`):
                Image standard deviation to use for normalization.

        Returns:
            `torch.Tensor`: The normalized image.
        )r   	normalize)r}   rQ   r   r   rp   r<   r<   r=   r   0  s   z BaseImageProcessorFast.normalizer(   r)   r-   r.   r/   r+   r,   rb   ra   c                 C   sB   |r|rt j||dd|  }t j||dd|  }d}|||fS )N)rb   g      ?F)torchr?   )r}   r-   r.   r/   r+   r,   rb   r<   r<   r=   !_fuse_mean_std_and_rescale_factorG  s
   

z8BaseImageProcessorFast._fuse_mean_std_and_rescale_factorrK   c                 C   sR   | j ||||||jd\}}}|r| |jtjd||}|S |r'| ||}|S )z/
        Rescale and normalize images.
        )r-   r.   r/   r+   r,   rb   )Zdtype)r   rb   r   tor   Zfloat32r   )r}   rK   r+   r,   r-   r.   r/   r<   r<   r=   rescale_and_normalizeX  s   	z,BaseImageProcessorFast.rescale_and_normalizec                 K   s>   |j du s
|jdu rtd|  t||d |d fS )a  
        Center crop an image to `(size["height"], size["width"])`. If the input size is smaller than `crop_size` along
        any edge, the image is padded with 0's and then center cropped.

        Args:
            image (`"torch.Tensor"`):
                Image to center crop.
            size (`Dict[str, int]`):
                Size of the output image.

        Returns:
            `torch.Tensor`: The center cropped image.
        Nz=The size dictionary must have keys 'height' and 'width'. Got rU   rV   )rU   rV   r:   ry   r   center_crop)r}   rQ   r5   rp   r<   r<   r=   r   t  s   z"BaseImageProcessorFast.center_cropc                 C      t |S )a'  
        Converts an image to RGB format. Only converts if the image is of type PIL.Image.Image, otherwise returns the image
        as is.
        Args:
            image (ImageInput):
                The image to convert.

        Returns:
            ImageInput: The converted image.
        )r   )r}   rQ   r<   r<   r=   r        z%BaseImageProcessorFast.convert_to_rgbc                 C   sB   | j du r|S | j D ]}||v rtd| d || q
|S )zJ
        Filter out the unused kwargs from the kwargs dictionary.
        Nz!This processor does not use the `z ` parameter. It will be ignored.)unused_kwargsloggerZwarning_oncerw   )r}   rp   
kwarg_namer<   r<   r=   rv     s   


z/BaseImageProcessorFast.filter_out_unused_kwargsc                 C   r   )z
        Prepare the images structure for processing.

        Args:
            images (`ImageInput`):
                The input images to process.

        Returns:
            `ImageInput`: The images with a valid nesting.
        )r   )r}   rK   r<   r<   r=   _prepare_images_structure  r   z0BaseImageProcessorFast._prepare_images_structurer_   r`   c                 C   s   t |}|tjtjtjfvrtd| |r| |}|tjkr't|}n|tjkr3t	
| }|d u r;t|}|tjkrI|ddd }|d urR||}|S )NzUnsupported input image type    r   r	   )r   r   ZPILZTORCHZNUMPYr:   r   r   Zpil_to_tensorr   Z
from_numpy
contiguousr   r   ZLASTZpermuter   )r}   rQ   r_   r`   rb   Z
image_typer<   r<   r=   _process_image  s    




z%BaseImageProcessorFast._process_imagec                 C   s<   |  |}t| j|||d}g }|D ]	}||| q|S )z:
        Prepare the input images for processing.
        )r_   r`   rb   )r   r   r   rT   )r}   rK   r_   r`   rb   Zprocess_image_fnprocessed_imagesrQ   r<   r<   r=   _prepare_input_images  s   

z,BaseImageProcessorFast._prepare_input_imagesr3   r\   r8   c                 K   s   |du ri }|durt d
i t||d}|dur$t d
i t|dd}t|tr-t|}t|tr6t|}|du r=tj}||d< ||d< ||d< ||d< ||d< ||d	< |S )z
        Update kwargs that need further processing before being validated
        Can be overridden by subclasses to customize the processing of kwargs.
        Nrq   r3   rr   r5   r\   r.   r/   r8   r<   )r   r   
isinstancerl   tupler   r;   )r}   r5   r3   r\   r.   r/   r8   rp   r<   r<   r=   _further_process_kwargs  s&   

z.BaseImageProcessorFast._further_process_kwargsr4   r2   r6   r]   r7   c                 K   s$   t |||||||||	|
||d dS )z@
        validate the kwargs for the preprocess method.
        )r+   r,   r-   r.   r/   r4   r5   r2   r3   r6   r7   r8   N)r>   )r}   r+   r,   r-   r.   r/   r4   r5   r2   r3   r6   r7   r8   rp   r<   r<   r=   _validate_preprocess_kwargs  s   
z2BaseImageProcessorFast._validate_preprocess_kwargsc                 O   s   | j |g|R i |S N)
preprocess)r}   rK   argsrp   r<   r<   r=   __call__8  s   zBaseImageProcessorFast.__call__c           	      O   s   t | | jd | jD ]}||t| |d  q|d}|d}|d}| j||||d}| jd
i |}| jd
i | |d}t	|t
tfrQt| n||d< |d |d	 | j|g|R i |S )N)Zcaptured_kwargsZvalid_processor_keysr_   r`   rb   )rK   r_   r`   rb   r6   r   r\   r8   r<   )r   ry   r|   
setdefaultr{   rw   r   r   r   r   rj   r%   r&   _preprocess)	r}   rK   r   rp   r   r_   r`   rb   r6   r<   r<   r=   r   ;  s"   






z!BaseImageProcessorFast.preprocessc              	   K   s   t |\}}i }| D ]\}}|r| j|||d}|||< qt||}t |\}}i }| D ]\}}|r<| ||}| ||||	|
|}|||< q0t||}|rYtj|ddn|}td|i|dS )N)rQ   r5   r   r   dimro   )dataZtensor_type)	r   itemsr   r   r   r   r   stackr   )r}   rK   r4   r5   r   r2   r3   r+   r,   r-   r.   r/   r7   rp   Zgrouped_imagesZgrouped_images_indexZresized_images_groupedrL   Zstacked_imagesZresized_imagesZprocessed_images_groupedr   r<   r<   r=   r   c  s&   



z"BaseImageProcessorFast._preprocessc                    s&   t   }|dd  |dd  |S )NZ_valid_processor_keysr|   )rt   to_dictrw   )r}   Zencoder_dictr   r<   r=   r     s   
zBaseImageProcessorFast.to_dict)NT)NNNNNN)NNN)NNNNNNNNNNNN)=rc   rd   re   r6   r.   r/   r5   r\   r3   r4   r2   r+   r,   r-   r_   r7   r   r;   r8   r`   rb   Zmodel_input_namesr[   rx   r   r   ru   r   rf   r   rk   r   r   r   r   r   r   rl   r   r   r   rh   ri   rj   r   r   r   rv   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r<   r<   r   r=   rn      s   
1







%
	
&	


"'	

+rn   c                   @   s$   e Zd Zddeee  fddZdS )SemanticSegmentationMixinNtarget_sizesc                    s   |j }|dur@t|t|krtdg  tt|D ]"}tjjj|| jdd|| ddd}|d j	dd} 
| q S |j	dd  fd	d
t jd D   S )aD  
        Converts the output of [`MobileNetV2ForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch.

        Args:
            outputs ([`MobileNetV2ForSemanticSegmentation`]):
                Raw outputs of the model.
            target_sizes (`List[Tuple]` of length `batch_size`, *optional*):
                List of tuples corresponding to the requested final size (height, width) of each prediction. If unset,
                predictions will not be resized.

        Returns:
            semantic_segmentation: `List[torch.Tensor]` of length `batch_size`, where each item is a semantic
            segmentation map of shape (height, width) corresponding to the target_sizes entry (if `target_sizes` is
            specified). Each entry of each `torch.Tensor` correspond to a semantic class id.
        NzTMake sure that you pass in as many target sizes as the batch dimension of the logitsr   r   ZbilinearF)r5   modeZalign_cornersr	   c                    s   g | ]} | qS r<   r<   )rF   rW   Zsemantic_segmentationr<   r=   rG     rH   zPSemanticSegmentationMixin.post_process_semantic_segmentation.<locals>.<listcomp>)logitslenr:   rS   r   nnr'   ZinterpolateZ	unsqueezeZargmaxrT   rL   )r}   Zoutputsr   r   idxZresized_logitsZsemantic_mapr<   r   r=   "post_process_semantic_segmentation  s"   z<SemanticSegmentationMixin.post_process_semantic_segmentationr   )rc   rd   re   r   rl   r   r   r<   r<   r<   r=   r     s    r   r   )Fcollections.abcr   	functoolsr   r   typingr   r   r   r   numpynpZimage_processing_utilsr
   r   r   Zimage_transformsr   r   r   r   r   Zimage_utilsr   r   r   r   r   r   r   r   r   r   r   Zprocessing_utilsr   utilsr   r   r    r!   r"   r#   r$   r%   r   r&   Ztorchvision.transforms.v2r'   r   Ztorchvision.transformsZ
get_loggerrc   r   r;   rf   rk   rl   rj   ri   r>   rC   rJ   r   rP   arrayrZ   r[   rn   r   r<   r<   r<   r=   <module>   s   4$
	
*

   Z