o
    Zha:                     @   s  U d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
 ddlZe	eegef  ed< ej r:ddlmZ ndZe
ejeedf Zi Zeeef ed< i Zeeef ed	< G d
d dZG dd dZG dd deZe	eegef  ed< ej rddlmZ ndZG dd deZeG dd dZ G dd deZ!G dd deZ"i Z#eee$e f ed< da%de
eejf de$e fddZ&de
eejf d e$e fd!d"Z'd ee(ee$e f  fd#d$Z)d%d& Z*dS )'a  
Device abstraction layer for TorchDynamo and Inductor backends.

This module provides a unified interface for different hardware backends (CUDA, XPU,
CPU, MPS) through a common device interface. Key components include:

- DeviceInterface: Base class defining the common API for all device types
- Device-specific implementations: CudaInterface, XpuInterface, CpuInterface, MpsInterface
- Device registration system for managing available backends
- Worker APIs for multi-processing scenarios
- Stream and event management across different devices
- Device property caching for worker processes

The abstraction layer enables device-agnostic code in TorchDynamo while allowing
specialized implementations for each hardware backend's unique features.
    N)Iterable)	dataclass)AnyCallableOptionalUnionget_cuda_stream)_cuda_getCurrentRawStream caching_worker_device_propertiescaching_worker_current_devicesc                   @   s  e Zd ZdZG dd dZG dd dZG dd dZG dd	 d	Zed
d Z	ede
fddZededefddZededefddZedd ZedefddZedejfddZedd ZedejfddZededed efd!d"Zed#edefd$d%Zed6de
fd'd(Zed6de
fd)d*Zed6de
fd+d,Zed7d.efd/d0Ze	-d7d1ejd.edefd2d3Zed6de
defd4d5Zd&S )8DeviceInterfacez
    This is a simple device runtime interface for Inductor. It enables custom
    backends to be integrated with Inductor in a device-agnostic semantic.
    c                   @   s   e Zd ZdefddZdS )zDeviceInterface.devicedevicec                 C      t NNotImplementedErrorclsr    r   M/var/www/auris/lib/python3.10/site-packages/torch/_dynamo/device_interface.py__new__0   s   zDeviceInterface.device.__new__N)__name__
__module____qualname__	_device_tr   r   r   r   r   r   /   s    r   c                   @      e Zd Zdd ZdS )zDeviceInterface.Eventc                 O      t d)NzYEvent should be inherited from torch.Event, otherwise, it couldn't be captured by dynamo.r   r   argskwargsr   r   r   r   4      zDeviceInterface.Event.__new__Nr   r   r   r   r   r   r   r   Event3       r"   c                   @   r   )zDeviceInterface.Streamc                 O   r   )Nz[Stream should be inherited from torch.Stream, otherwise, it couldn't be captured by dynamo.r   r   r   r   r   r   :   r    zDeviceInterface.Stream.__new__Nr!   r   r   r   r   Stream9   r#   r$   c                   @   sH   e Zd ZdZedefddZedefddZeddefd	d
Z	dS )zDeviceInterface.Workera  
        Worker API to query device properties that will work in multi processing
        workers that cannot use the GPU APIs (due to processing fork() and
        initialization time issues). Properties are recorded in the main process
        before we fork the workers.
        r   c                 C   r   r   r   r   r   r   r   
set_deviceG      z!DeviceInterface.Worker.set_devicereturnc                   C   r   r   r   r   r   r   r   current_deviceK   r'   z%DeviceInterface.Worker.current_deviceNc                 C   r   r   r   r%   r   r   r   get_device_propertiesO   r'   z,DeviceInterface.Worker.get_device_propertiesr   )
r   r   r   __doc__staticmethodintr&   r)   r   r*   r   r   r   r   Worker?   s    r.   c                   C   r   r   r   r   r   r   r   r)   S   r'   zDeviceInterface.current_devicec                 C   r   r   r   r%   r   r   r   r&   W   r'   zDeviceInterface.set_devicer(   c                 C   r   r   r   r%   r   r   r   maybe_exchange_device[   r'   z%DeviceInterface.maybe_exchange_devicec                 C   r   r   r   r%   r   r   r   exchange_device_   r'   zDeviceInterface.exchange_devicec                   C   r   r   r   r   r   r   r   device_countc   r'   zDeviceInterface.device_countc                   C   r   r   r   r   r   r   r   is_availableg   r'   zDeviceInterface.is_availablestreamc                 C   r   r   r   r3   r   r   r   r3   k   r'   zDeviceInterface.streamc                   C   r   r   r   r   r   r   r   current_streamo   r'   zDeviceInterface.current_streamc                 C   r   r   r   r4   r   r   r   
set_streams   r'   zDeviceInterface.set_stream	stream_iddevice_indexdevice_typec                 C   r   r   r   )r7   r8   r9   r   r   r   _set_stream_by_idw   r'   z!DeviceInterface._set_stream_by_id
device_idxc                 C   r   r   r   r;   r   r   r   get_raw_stream{   r'   zDeviceInterface.get_raw_streamNc                 C   r   r   r   r%   r   r   r   synchronize   r'   zDeviceInterface.synchronizec                 C   s   | j |S r   )r.   r*   r   r   r   r   r*         z%DeviceInterface.get_device_propertiesc                 C   r   r   r   r%   r   r   r   get_compute_capability   r'   z&DeviceInterface.get_compute_capabilityFincluding_emulationc                 C   r   r   r   rA   r   r   r   is_bf16_supported   r'   z!DeviceInterface.is_bf16_supporteddtypec                 C   s   |t jkp	| |S r   )torchbfloat16rC   r   rD   rA   r   r   r   is_dtype_supported   s   z"DeviceInterface.is_dtype_supportedc                 C   r   r   r   r%   r   r   r   memory_allocated   r'   z DeviceInterface.memory_allocatedr   F) r   r   r   r+   r   r"   r$   r.   r,   r)   r   r&   r-   r/   r0   r1   boolr2   rE   r3   r5   r6   r:   r=   r>   classmethodr*   r@   rC   rD   rH   rI   r   r   r   r   r   )   s^    


r   c                   @   sL   e Zd ZdZdee dee ddfddZdd	 Z	d
e
de
de
fddZdS )DeviceGuarda_  
    This class provides a context manager for device switching. This is a stripped
    down version of torch.{device_name}.device.

    The context manager changes the current device to the given device index
    on entering the context and restores the original device on exiting.
    The device is switched using the provided device interface.
    device_interfaceindexr(   Nc                 C   s   || _ || _d| _d S )N)rN   idxprev_idx)selfrN   rO   r   r   r   __init__   s   
zDeviceGuard.__init__c                 C   s"   | j d ur| j| j | _d S d S r   )rQ   rN   r0   rR   )rS   r   r   r   	__enter__   s   
zDeviceGuard.__enter__typevalue	tracebackc                 C   s   | j d ur| j| j| _ dS NF)rQ   rN   r/   rR   )rS   rV   rW   rX   r   r   r   __exit__   s   
zDeviceGuard.__exit__)r   r   r   r+   rV   r   r   r-   rT   rU   r   rZ   r   r   r   r   rM      s    	
rM   c                   @   s   e Zd ZejjZejjZejjZG dd dZe	ejj
Z
e	ejjZe	ejjZe	ejjZe	ejjZe	ejjZe	ejjZe	ejjZe	ejjZe	eZe	ejjZe	ejjZe	ejjZe	ejjZe	defddZe	d
defdd	ZdS )CudaInterfacec                   @   D   e Zd ZedefddZedefddZed
defdd	ZdS )zCudaInterface.Workerr   c                 C      | t d< d S Ncudar   r%   r   r   r   r&      r?   zCudaInterface.Worker.set_devicer(   c                   C      dt v rt d S tj S r^   )r   rE   r_   r)   r   r   r   r   r)         
z#CudaInterface.Worker.current_deviceNc                 C      | d urt | trt| } | jdksJ t | tjr| j} | d u r'tj } dt	vr;dd t
tj D }|t	d< t	d |  S )Nr_   c                 S      g | ]}t j|qS r   )rE   r_   r*   .0ir   r   r   
<listcomp>       
z>CudaInterface.Worker.get_device_properties.<locals>.<listcomp>)
isinstancestrrE   r   rV   rO   r[   r.   r)   r
   ranger_   r1   r   Zdevice_propr   r   r   r*         


z*CudaInterface.Worker.get_device_propertiesr   	r   r   r   r,   r-   r&   r)   r   r*   r   r   r   r   r.          r.   r(   c                   C   
   t j S r   )rE   r_   r2   r   r   r   r   r2         
zCudaInterface.is_availableNr   c                 C   sB   t jjd u rt j| \}}|d | S t j| jddd S )N
   :   r   )rE   versionZhipr_   get_device_capabilityr*   ZgcnArchNamesplit)r   majorminr   r   r   r@      s   z$CudaInterface.get_compute_capabilityr   )r   r   r   rE   r_   r   r"   r$   r.   r,   r)   r&   r1   r3   r5   r6   r:   r>   r*   r   r=   _exchange_devicer0   _maybe_exchange_devicer/   rI   rC   rK   r2   r   r@   r   r   r   r   r[      s.    r[   get_xpu_stream)_xpu_getCurrentRawStreamc                   @   s  e Zd ZejjZejjZejjZG dd dZe	ejj
Z
e	ejjZe	ejjZe	ejjZe	ejjZe	ejjZe	ejjZe	ejjZe	ejjZe	eZe	ejjZe	ejjZe	ejjZe	defddZe	ddefdd	Ze	ddedefddZdS )XpuInterfacec                   @   r\   )zXpuInterface.Workerr   c                 C   r]   Nxpur`   r%   r   r   r   r&     r?   zXpuInterface.Worker.set_devicer(   c                   C   ra   r   )r   rE   r   r)   r   r   r   r   r)   
  rb   z"XpuInterface.Worker.current_deviceNc                 C   rc   )Nr   c                 S   rd   r   )rE   r   r*   re   r   r   r   rh     ri   z=XpuInterface.Worker.get_device_properties.<locals>.<listcomp>)rj   rk   rE   r   rV   rO   r   r.   r)   r
   rl   r   r1   rm   r   r   r   r*     rn   z)XpuInterface.Worker.get_device_propertiesr   ro   r   r   r   r   r.     rp   r.   r(   c                   C   rq   r   )rE   r   r2   r   r   r   r   r2   3  rr   zXpuInterface.is_availableNr   c                 C   s   t j| }|S r   )rE   r   rw   )r   ccr   r   r   r@   7  s   z#XpuInterface.get_compute_capabilityFrA   c                 C   rq   r   )rE   r   rC   rB   r   r   r   rC   <  rr   zXpuInterface.is_bf16_supportedr   rJ   )r   r   r   rE   r   r   r"   r$   r.   r,   r)   r&   r1   r3   r5   r6   r:   r>   r*   r}   r=   r{   r0   r|   r/   rI   rK   r2   r   r@   rC   r   r   r   r   r      s0    r   c                   @   s   e Zd ZU eed< dS )CpuDevicePropertiesZmulti_processor_countN)r   r   r   r-   __annotations__r   r   r   r   r   A  s   
 r   c                   @   s   e Zd ZG dd dejZedefddZeddefdd	Zedde	de
fddZedefddZedd Zedde	fddZG dd dZd
S )CpuInterfacec                   @   s.   e Zd Zd
ddZdefddZddd	ZdS )zCpuInterface.EventTc                 C   s
   d| _ d S )Ng        time)rS   Zenable_timingr   r   r   rT   H  s   
zCpuInterface.Event.__init__r(   c                 C   s   |j | j  d S )Ni  r   )rS   Z	end_eventr   r   r   elapsed_timeK  s   zCpuInterface.Event.elapsed_timeNc                 C   s   t  | _ d S r   )r   perf_counter)rS   r3   r   r   r   recordN  s   zCpuInterface.Event.record)Tr   )r   r   r   rT   floatr   r   r   r   r   r   r"   G  s    
r"   r(   c                   C      dS NTr   r   r   r   r   r2   Q  r'   zCpuInterface.is_availableFrA   c                 C   r   r   r   rB   r   r   r   rC   U  r'   zCpuInterface.is_bf16_supportedNr   c                 C   r   N r   r%   r   r   r   r@   Y  r'   z#CpuInterface.get_compute_capabilityc                 C   r   Nr   r   r<   r   r   r   r=   ]  r'   zCpuInterface.get_raw_streamc                   C   r   r   r   r   r   r   r   r)   a  r'   zCpuInterface.current_devicec                 C   s   d S r   r   r%   r   r   r   r>   e  r'   zCpuInterface.synchronizec                   @   s    e Zd ZeddefddZdS )zCpuInterface.WorkerNr   c                 C   s   dd l }| }t|S r   )multiprocessing	cpu_countr   )r   r   r   r   r   r   r*   j  s   z)CpuInterface.Worker.get_device_propertiesr   )r   r   r   r,   r   r*   r   r   r   r   r.   i  s    r.   rJ   r   )r   r   r   rE   r"   r,   rK   r2   rC   r   rk   r@   r-   r=   r)   r>   r.   r   r   r   r   r   F  s    

r   c                   @   s   e Zd ZeddedefddZe	ddejdedefddZ	edefd	d
Z
edd ZeddedefddZeddefddZG dd dZdS )MpsInterfaceFrA   r(   c                 C   s   t jjddS )N   r   )rE   backendsmpsZis_macos_or_newerrB   r   r   r   rC   s  s   zMpsInterface.is_bf16_supportedrD   c                 C   s"   |t jkrdS |t jkp| |S rY   )rE   Zfloat64rF   rC   rG   r   r   r   rH   w  s   
zMpsInterface.is_dtype_supportedc                   C   s   t jj S r   )rE   r   r   r2   r   r   r   r   r2     r?   zMpsInterface.is_availablec                   C   r   r   r   r   r   r   r   r)     r'   zMpsInterface.current_deviceNr   c                 C   r   r   r   r%   r   r   r   r@     r'   z#MpsInterface.get_compute_capabilityc                 C   s   t j  d S r   )rE   r   r>   r%   r   r   r   r>     s   zMpsInterface.synchronizec                   @   s,   e Zd ZeddefddZedd ZdS )zMpsInterface.WorkerNr   c                 C   s   i S r   r   r%   r   r   r   r*     r'   z)MpsInterface.Worker.get_device_propertiesc                   C   r   r   r   r   r   r   r   r)     r'   z"MpsInterface.Worker.current_devicer   )r   r   r   r,   r   r*   r)   r   r   r   r   r.     s
    r.   rJ   r   )r   r   r   r,   rK   rC   rL   rE   rD   rH   r2   r)   r   rk   r@   r>   r.   r   r   r   r   r   r  s*    
r   device_interfacesFr   rN   c                 C   s   t | tjr	| j} |t| < d S r   )rj   rE   r   rV   r   )r   rN   r   r   r   register_interface_for_device  s   r   r(   c                 C   s:   t | tjr	| j} tst  | tv rt|  S td|  )NzNo interface for device )rj   rE   r   rV   _device_initializedinit_device_regr   r   r%   r   r   r   get_interface_for_device  s   r   c                   C   s   t st  t S r   )r   r   r   itemsr   r   r   r    get_registered_device_interfaces  s   r   c                  C   sx   t dt ttj D ]
} t d|  t qt dt ttj D ]
} t d|  t q#t dt t dt	 da
d S )Nr_   zcuda:r   zxpu:cpur   T)r   r[   rl   rE   r_   r1   r   r   r   r   r   )rg   r   r   r   r     s   



r   )+r+   r   collections.abcr   dataclassesr   typingr   r   r   r   rE   r-   r   r_   Z_is_compiledZtorch._Cr	   r   r   rk   r   r
   dictr   r   rM   r[   r   r~   r}   r   r   r   r   r   rV   r   r   r   tupler   r   r   r   r   r   <module>   sF   
qD
A,'
 
