o
    Zh$                     @   s   d Z ddlmZmZmZmZmZ ddlmZ ddl	m
Z
mZ ddlmZmZ ddlmZmZmZmZmZmZ ddlmZmZ g d	Zg d
Zdd ZG dd de
ZdgZdS )z"Image processor class for Idefics.    )CallableDictListOptionalUnion)Image   )BaseImageProcessorBatchFeature)resizeto_channel_dimension_format)ChannelDimension
ImageInputPILImageResamplingmake_list_of_imagesto_numpy_arrayvalid_images)
TensorTypeis_torch_available)g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?c                 C   sB   | j dkr| S | d}td|jd}t||}|d}|S )NRGBZRGBA)   r   r   )modeconvertr   newsizealpha_composite)imageZ
image_rgba
backgroundr    r   c/var/www/auris/lib/python3.10/site-packages/transformers/models/idefics/image_processing_idefics.pyconvert_to_rgb&   s   


r    c                       s  e Zd ZdZdgZ						dded	eeee	e f  d
eeee	e f  dee de
deeef ddf fddZdddddddejfdedee deeeef  d	eeee	e f  d
eeee	e f  dee dee
 dee deeeef  defddZ  ZS )IdeficsImageProcessora  
    Constructs a Idefics image processor.

    Args:
        image_size (`int`, *optional*, defaults to 224):
            Resize to image size
        image_mean (`float` or `List[float]`, *optional*, defaults to `IDEFICS_STANDARD_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
            overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `List[float]`, *optional*, defaults to `IDEFICS_STANDARD_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
            Can be overridden by the `image_std` parameter in the `preprocess` method.
        image_num_channels (`int`, *optional*, defaults to 3):
            Number of image channels.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
            the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
            method.
    pixel_values   Nr   Tp?
image_size
image_mean	image_stdimage_num_channels
do_rescalerescale_factorreturnc                    sR   t  jdi | || _|| _|d ur|nt| _|d ur|nt| _|| _|| _	d S )Nr   )
super__init__r%   r(   IDEFICS_STANDARD_MEANr&   IDEFICS_STANDARD_STDr'   r)   r*   )selfr%   r&   r'   r(   r)   r*   kwargs	__class__r   r   r-   N   s   

zIdeficsImageProcessor.__init__images	transformreturn_tensorsc
                    s^  |dur|nj }|dur|nj} dur nj dur!nj|dur*|nj}dur3nj||ft|trGt|dkrGg S t	|}t
|sStddurpt s^tdddl}fdd|D }||S dd |D }dd |D }fd	d|D }fd
d|D } fdd|D }dd |D }td|i|	dd }|S )a|  
        Preprocess a batch of images.

        Args:
            images (`ImageInput`):
                A list of images to preprocess.
            image_size (`int`, *optional*, defaults to `self.image_size`):
                Resize to image size
            image_num_channels (`int`, *optional*, defaults to `self.image_num_channels`):
                Number of image channels.
            image_mean (`float` or `List[float]`, *optional*, defaults to `IDEFICS_STANDARD_MEAN`):
                Mean to use if normalizing the image. This is a float or list of floats the length of the number of
                channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can
                be overridden by the `image_mean` parameter in the `preprocess` method.
            image_std (`float` or `List[float]`, *optional*, defaults to `IDEFICS_STANDARD_STD`):
                Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
                number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess`
                method. Can be overridden by the `image_std` parameter in the `preprocess` method.
            transform (`Callable`, *optional*, defaults to `None`):
                A custom transform function that accepts a single image can be passed for training. For example,
                `torchvision.Compose` can be used to compose multiple transforms. If `None` - an inference mode is
                assumed - and then a preset of inference-specific transforms will be applied to the images
            do_rescale (`bool`, *optional*, defaults to `True`):
                Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
                the `preprocess` method.
            rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
                Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
                method.

        Returns:
            a PyTorch tensor of the processed images

        Nr   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.z.To pass in `transform` torch must be installedc                    s   g | ]} |qS r   r   .0x)r5   r   r   
<listcomp>       z4IdeficsImageProcessor.preprocess.<locals>.<listcomp>c                 S      g | ]}t |qS r   )r    r7   r   r   r   r:      r;   c                 S   r<   r   )r   r7   r   r   r   r:      r;   c                    s   g | ]
}t | tjd qS ))Zresample)r   r   ZBICUBICr7   )r   r   r   r:          c                    s   g | ]	}j | d qS ))r   scale)Zrescale)r8   r   )r*   r0   r   r   r:      s    c                    s   g | ]
}j | d qS ))meanZstd)	normalizer7   )r&   r'   r0   r   r   r:      r=   c                 S   s   g | ]}t |tjqS r   )r   r   ZFIRSTr7   r   r   r   r:      s    r"   )dataZtensor_type)r%   r(   r&   r'   r)   r*   
isinstancelistlenr   r   
ValueErrorr   ImportErrortorchstackr
   )r0   r4   r(   r%   r&   r'   r5   r)   r*   r6   r1   rG   r   )r&   r'   r*   r0   r   r5   r   
preprocessa   s8   .
z IdeficsImageProcessor.preprocess)r#   NNr   Tr$   )__name__
__module____qualname____doc__Zmodel_input_namesintr   r   floatr   boolr-   r   ZPYTORCHr   r   strr   rI   __classcell__r   r   r2   r   r!   3   sj    
		
r!   N)rM   typingr   r   r   r   r   ZPILr   Zimage_processing_utilsr	   r
   Zimage_transformsr   r   Zimage_utilsr   r   r   r   r   r   utilsr   r   r.   r/   r    r!   __all__r   r   r   r   <module>   s     
