a
    h/Q                     @   s  d dl Z d dlZd dlmZ d dlmZmZ d dlZddlm	Z	 ddl
mZ ze	d d	ZW n eefyv   d
ZY n0 eZed dZG dd dZG dd dZeeef ddddZejejejejejejedddZejejeeef ejdddZdd	d d d d ded	d d defeeeeeeeeeef eeeeeeef eeejejef dddZeeee ee ef dddZeedd d!Zd1ejeeeeeeeeef eeeeeeeef eeeejejf d"d#d$Z ejeee ee ef d%d&d'Z!ejed%d(d)Z"d2eeeef eeeef  eeejeje#eef f d+d,d-Z$d3eeeeee ee f ee f d.d/d0Z%dS )4    N)Fraction)OptionalUnion   )_load_library   ) _raise_video_deprecation_warningvideo_readerTFc                   @   s0   e Zd ZeedZddgZeeddddZdS )Timebase	numeratordenominatorr   r   N)r   r   returnc                 C   s   || _ || _d S )Nr   )selfr   r    r   G/var/www/auris/lib/python3.9/site-packages/torchvision/io/_video_opt.py__init__   s    zTimebase.__init__)__name__
__module____qualname__int__annotations__	__slots__r   r   r   r   r   r
      s   
r
   c                	   @   s8   e Zd ZeeeeeeeedZg dZddddZdS )VideoMetaData)	has_videovideo_timebasevideo_duration	video_fps	has_audioaudio_timebaseaudio_durationaudio_sample_rateN)r   c                 C   s@   d| _ tdd| _d| _d| _d| _tdd| _d| _d| _d S )NFr   r   g        )	r   r
   r   r   r   r   r   r    r!   )r   r   r   r   r   ;   s    zVideoMetaData.__init__)	r   r   r   boolr
   floatr   r   r   r   r   r   r   r   %   s   
r   )	pts_ranger   c                 C   s@   | d | d   krdkr<n nt d| d  d| d  d S )Nr   r   z=Start pts should not be smaller than end pts, got start pts: z and end pts: )
ValueError)r$   r   r   r   _validate_ptsF   s     r&   )	vtimebasevfps	vduration	atimebaseasample_rate	adurationr   c                 C   s$  t  }|  dkrvtt| d  t| d  |_| d  t| d   }| dkrvd|_t| | |_| dkrt| |_	| dkrtt|d  t|d  |_
|d  t|d   }| dkrd|_t| | |_| dkr t| |_|S )zE
    Build update VideoMetaData struct with info about the video
    r   r   T)r   numelr
   r   itemr   r#   r   r   r   r   r   r    r!   )r'   r(   r)   r*   r+   r,   metaZtimebaser   r   r   
_fill_infoN   s$    $$r0   )aframes
aframe_ptsaudio_pts_ranger   c           	      C   s   |d |d  }}|  d}t|| d t| }d}|}||d k r\t|d | | }|d dkr||d krt|d | | }| ||d d f S )Nr   r   )sizer#   r   )	r1   r2   r3   startendZnum_samplesZstep_per_aframeZs_idxZe_idxr   r   r   _align_audio_framesn   s    
r8         ?r   r4   )filenameseek_frame_marginread_video_streamvideo_widthvideo_heightvideo_min_dimensionvideo_max_dimensionvideo_pts_ranger   read_audio_streamaudio_samplesaudio_channelsr3   r   r   c                 C   s   t   t| t| tjj| |d||||||d |d |j|j|	|
||d |d |j|j}|\
}}}}}}}}}}t||||||}|	 dkrt
|||}|||fS )ab  
    Reads a video from a file, returning both the video frames and the audio frames

    Args:
    filename (str): path to the video file
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise. Thus,
        when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase (Fraction, optional): a Fraction rational number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase (Fraction, optional): a Fraction rational number which denotes time base in audio stream

    Returns
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of audio_channels
        info (Dict): metadata for the video and audio. Can contain the fields video_fps (float)
            and audio_fps (int)
    r   r   )r   r&   torchopsr	   read_video_from_filer   r   r0   r-   r8   )r;   r<   r=   r>   r?   r@   rA   rB   r   rC   rD   rE   r3   r   resultvframes_vframe_ptsr'   r(   r)   r1   r2   r*   r+   r,   infor   r   r   _read_video_from_file}   s:    @rM   )r;   r   c                 C   s~   t jj| dddddddddddddddddd}|\
}}}}}}}}	}
}t||||	|
|}|  }|  }|||fS )z
    Decode all video- and audio frames in the video. Only pts
    (presentation timestamp) is returned. The actual frame pixel data is not
    copied. Thus, it is much faster than read_video(...)
    r   r   r4   )rF   rG   r	   rH   r0   numpytolist)r;   rI   _vframes
vframe_ptsr'   r(   r)   _aframesr2   r*   r+   r,   rL   r   r   r    _read_video_timestamps_from_file   s4    rS   c           	      C   s:   t   tjj| }|\}}}}}}t||||||}|S )zO
    Probe a video file and return VideoMetaData with info about the video
    )r   rF   rG   r	   Zprobe_video_from_filer0   )	r;   rI   r'   r(   r)   r*   r+   r,   rL   r   r   r   _probe_video_from_file  s
    rT   )
video_datar<   r=   r>   r?   r@   rA   rB   video_timebase_numeratorvideo_timebase_denominatorrC   rD   rE   r3   audio_timebase_numeratoraudio_timebase_denominatorr   c                 C   s   t   t| t| t| tjsht . tjddd tj| tj	d} W d   n1 s^0    Y  tj
j| |d||||||d |d ||	|
|||d |d ||}|\
}}}}}}}}}}| dkrt|||}||fS )a  
    Reads a video from memory, returning both the video frames as the audio frames
    This function is torchscriptable.

    Args:
    video_data (data type could be 1) torch.Tensor, dtype=torch.int8 or 2) python bytes):
        compressed video content stored in either 1) torch.Tensor 2) python bytes
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise.
        Thus, when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase_numerator / video_timebase_denominator (float, optional): a rational
        number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio audio_channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase_numerator / audio_timebase_denominator (float, optional):
        a rational number which denotes time base in audio stream

    Returns:
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of channels
    ignore The given buffer is not writablemessageZdtypeNr   r   )r   r&   
isinstancerF   Tensorwarningscatch_warningsfilterwarnings
frombufferuint8rG   r	   read_video_from_memoryr-   r8   )rU   r<   r=   r>   r?   r@   rA   rB   rV   rW   rC   rD   rE   r3   rX   rY   rI   rJ   rK   r'   r(   r)   r1   r2   r*   r+   r,   r   r   r   _read_video_from_memory  s@    E
.rg   )rU   r   c                 C   s   t | tjsRt . tjddd tj| tjd} W d   n1 sH0    Y  tjj	
| dddddddddddddddddd}t  |\
}}}}}}}}	}
}t||||	|
|}|  }|  }|||fS )	z
    Decode all frames in the video. Only pts (presentation timestamp) is returned.
    The actual frame pixel data is not copied. Thus, read_video_timestamps(...)
    is much faster than read_video(...)
    rZ   r[   r\   r^   Nr   r   r4   )r_   rF   r`   ra   rb   rc   rd   re   rG   r	   rf   r   r0   rN   rO   )rU   rI   rP   rQ   r'   r(   r)   rR   r2   r*   r+   r,   rL   r   r   r   "_read_video_timestamps_from_memoryz  s>    
.rh   c           	      C   s   t   t| tjsXt . tjddd tj| tjd} W d   n1 sN0    Y  tj	j
| }|\}}}}}}t||||||}|S )zy
    Probe a video in memory and return VideoMetaData with info about the video
    This function is torchscriptable
    rZ   r[   r\   r^   N)r   r_   rF   r`   ra   rb   rc   rd   re   rG   r	   Zprobe_video_from_memoryr0   )	rU   rI   r'   r(   r)   r*   r+   r,   rL   r   r   r   _probe_video_from_memory  s    
.ri   pts)r;   	start_ptsend_ptspts_unitr   c              	      s   t    d u rtd dkr(td t| }|j}|j} fdd}d}t}	|rrt|j	j
|j	j}	||	}d}
t}|rt|jj
|jj}||}
t| d||	d|
|d\}}}i }|r|j|d	< |r|j|d
< |||fS )Ninfrj   mThe pts_unit 'pts' gives wrong results and will be removed in a follow-up version. Please use pts_unit 'sec'.c                    s`   } }dkrHt td|   }|tdkrHt t d|   }|tdkrXd}||fS )Nsecr   rn   r4   )r   mathfloorr#   ceil)Z	time_baseZstart_offset
end_offsetrl   rm   rk   r   r   get_pts  s    z_read_video.<locals>.get_ptsr:   T)r=   rB   r   rC   r3   r   r   Z	audio_fps)r   r#   ra   warnrT   r   r   default_timebaser   r   r   r   r   rM   r   r!   )r;   rk   rl   rm   rL   r   r   rv   rB   r   r3   r   rJ   r1   _infor   ru   r   _read_video  sH    	

rz   )r;   rm   r   c                    sj   t   |dkrtd t| \}}}|dkrRt|jj|jj  fdd|D }|jr^|j	nd }||fS )Nrj   ro   rp   c                    s   g | ]}|  qS r   r   ).0xZvideo_time_baser   r   
<listcomp>      z*_read_video_timestamps.<locals>.<listcomp>)
r   ra   rw   rS   r   r   r   r   r   r   )r;   rm   rj   _rL   r   r   r}   r   _read_video_timestamps  s    r   )r9   r   r   r   r   r   r:   r   r   r   r   r   r:   r   r   )r   Nrj   )rj   )&rq   ra   Z	fractionsr   typingr   r   rF   	extensionr   Z_video_deprecation_warningr   Z_HAS_CPU_VIDEO_DECODERImportErrorOSErrorZ_HAS_VIDEO_OPTrx   r
   r   tupler   r&   r`   r0   r8   strr#   r"   rM   listrS   rT   rg   rh   ri   dictrz   r   r   r   r   r   <module>   s   

!	!

a"#               

o,   
? 