a
    h                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlZddlmZmZ ddlmZ eeZeeeddd	d
de
e dddZejeddZejeddZdd Zejdd ZdS )a*  
This module provides TVM backend integration for TorchDynamo.

Apache TVM is a deep learning compiler framework that can optimize and execute
models on various hardware backends. This module enables:

- Compilation of PyTorch models to TVM's computation graphs
- Multiple scheduling options:
  - Default scheduler
  - Auto-scheduler for automatic optimization
  - Meta-schedule for evolutionary search-based tuning
- Hardware-specific optimizations:
  - CUDA GPU support
  - CPU support with LLVM targeting and architecture-specific tuning
  - Automatic detection of CPU capabilities (AVX2, AVX512)
- Tensor conversion utilities between PyTorch and TVM formats
- Configurable optimization levels and tuning trials

The backend can be used with torch.compile():
    model = torch.compile(model, backend="tvm")
    N)MappingProxyType)Optional   )device_from_inputsfake_tensor_unsupported)register_backend N     )	schedulertrials	opt_level)optionsc                   sz  dd l ddl m} ddlm} tj| |}t|}dd t|D }| | }t	|dkrlt
d | jS |j||\}	}
|jdkr|j}j }nd}jt }|dd }|d u rtjd	d }|d
d}|dd}|dkr6ddl m} t }tj|s||	d |
|\}}t	|dkr|||}tj|s|dksjJ |j ||!|gdd}z|"| W n. t#y   tj|rt$|  Y n0 |%|V j&j'|ddid  |j(|	||
d}W d    n1 s0    Y  W d    n1 s(0    Y  n
|dkrddl m)} t* ~}|jdkrjt  d|j+j,dd }|dksJ |j-j.|	|||d|
d|d}|j-j/||	||
|d}W d    n1 s0    Y  nZ|d ks|s8j&j'|d!  |j(|	||
d}W d    n1 s,0    Y  nt0d"|1|d  | d#d$ fd%d& fd'd(}|S ))Nr   )relay)graph_executorc                 S   s    g | ]\}}d | |j fqS )inp_)shape).0idxi r   H/var/www/auris/lib/python3.9/site-packages/torch/_dynamo/backends/tvm.py
<listcomp>;       ztvm.<locals>.<listcomp>z0Explicitly fall back to eager due to zero outputcudar
   ZTVM_SCHEDULERr   r   r   r	   auto_scheduler)r   maini  )Znum_measure_trialsZmeasure_callbacksZearly_stoppingz relay.backend.use_auto_schedulerT)r   config)targetparamsmeta_schedule)r   z --num-cores F)logical@   Zevolutionary)modr   work_dirZmax_trials_globalZnum_trials_per_iterr   Zstrategyr   )databaser"   r   r   r   default)r   zThis tuning option is invalid/not implemented for torchdynamo's TVM-related backend. There are three available options: default, auto_scheduler and meta_schedule.c                 S   s*   | j dkrt|  S tjj|  S )z8A helper function to transfer a NDArray to torch.tensor.bool)dtypetorchZ
from_numpynumpyutilsZdlpackfrom_dlpackZ	to_dlpack)Z	nd_tensorr   r   r   to_torch_tensor   s    
ztvm.<locals>.to_torch_tensorc                    s,   | j tjkr  j|   S  j| S )z8A helper function to transfer a torch.tensor to NDArray.)r'   r(   r&   Zndarraycpur)   r+   )Ztorch_tensor)tvmr   r   to_tvm_tensor   s    ztvm.<locals>.to_tvm_tensorc                     s   dd | D }   \}}dd | D }t|dD ]R\}}| dkr6|jrX| }d| }||vrxtd| q6 || q6 	   fddt
  D S )	Nc                 S   s   g | ]}|  qS r   )
contiguous)r   ar   r   r   r      r   z)tvm.<locals>.exec_tvm.<locals>.<listcomp>c                 S   s   h | ]\}}|qS r   r   )r   name_r   r   r   	<setcomp>   r   z(tvm.<locals>.exec_tvm.<locals>.<setcomp>r   r   z6input %s skipped as not found in tvm's runtime libraryc                    s   g | ]}  |qS r   )Z
get_output)r   r   )mr,   r   r   r      r   )Zget_input_infoitems	enumerateZdimZrequires_graddetachlogwarningZ	set_inputrunrangeZget_num_outputs)Zi_argsargsZ
shape_infor4   Zactive_inputsr   argZinp_name)r6   r,   r0   r   r   exec_tvm   s(    
ztvm.<locals>.exec_tvm)2r/   r   Ztvm.contribr   r(   Zjittracer   r8   lenr:   r;   ZforwardZfrontendZfrom_pytorchtyper   indexr   r.   ZTargetllvm_targetgetosenvironr   tempfileNamedTemporaryFilepathexistsZextract_tasksZTaskSchedulerZTuningOptionsZRecordToFileZtune	ExceptionunlinkZApplyHistoryBestZ	transformZPassContextbuildr   TemporaryDirectoryr*   	cpu_countZrelay_integrationZ
tune_relayZcompile_relayNotImplementedErrorZGraphModule)ZgmZexample_inputsr   r   r   Zjit_modZdeviceZ
shape_listZexample_outputsr"   r   devr   r
   r   r   r   Zlog_fileZtasksZtask_weightsZtunerZtune_optionlibmsr#   r$   r@   r   )r6   r,   r0   r/   r   r/   +   s    







T


(2	r/   r   )r
   r   c                   C   s*   zt d W dS  ty$   Y dS 0 d S )Nr/   TF)	importlibimport_moduleImportErrorr   r   r   r   has_tvm   s
    
rY   c                  C   s2   t jdkr.td } d| v r"dS d| v r.dS dS )Nlinuxz/proc/cpuinfoZavx512zllvm -mcpu=skylake-avx512Zavx2zllvm -mcpu=core-avx2Zllvm)sysplatformopenread)Zcpuinfor   r   r   rE      s    
rE   )__doc__	functoolsrV   loggingrG   r[   rI   typesr   typingr   r(   commonr   r   registryr   	getLogger__name__r:   r/   partialZtvm_meta_scheduleZtvm_auto_schedulerrY   cacherE   r   r   r   r   <module>   s4   

 