o
    Zh+                     @  s  d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZmZ d dlmZ d dlmZ e	r>d dlmZ ejed	Zejed
Zeee
eejf   Zeee gef ZejddG dd dZ ejddG dd dZ!ejddG dd dZ"d`ddZ#daddZ$dbddZ%dcd!d"Z&ddd%d&Z'ded+d,Z(dfd2d3Z)dgd5d6Z*dhd9d:Z+dhd;d<Z,did?d@Z-ejG dAdB dBZ.djdJdKZ/dkdMdNZ0G dOdP dPeZ1dldUdVZ2dmd\d]Z3ejddG d^d_ d_Z4dS )n    )annotationsN)Enum)AnyCallableOptionalTYPE_CHECKINGUnion)countersget_metrics_context)	InputType)
OrderedSet)SequenceZ
perf_hintsZcudagraph_static_inputsT)frozenc                   @  s   e Zd ZU dZded< dS )
FunctionIDz9Unique counter of a function wrapped in cudagraphify_implintidN__name__
__module____qualname____doc____annotations__ r   r   N/var/www/auris/lib/python3.10/site-packages/torch/_inductor/cudagraph_utils.pyr      s   
 r   c                   @  s2   e Zd ZU dZded< ded< ded< ded< d	S )
PlaceholderInfoz
    A serializable version of torch.fx.Node that contains information
    pertinent to placeholder stack traces. We use these in logging and error messages
    related to cudagraphs, and will cache these results.
    strnameOptional[str]stack_tracelist[PlaceholderInfo]usersmutating_use_stack_traceNr   r   r   r   r   r   #   s   
 r   c                   @  sB   e Zd ZU dZded< ded< ded< ded	< d
ed< ded< dS )WrappedFunctionz
    Represents a function that you want to record for CUDA graph replay,
    with a little more metadata so we can identify if we have an applicable
    CUDA graph in our CUDA graph tree for it.
    zCallable[..., Any]modelSequence[int]static_input_idxsr   r   ztuple[torch.Tensor, ...]	constantsSequence[PlaceholderInfo]placeholdersmutated_input_idxsNr   r   r   r   r   r"   2   s   
 r"   placeholder_nodetorch.fx.Nodereturnr   c                 C  sb   t | jdkrtt| jjdd S | jD ]}|jtjj	j
jkr.|jdd  }r.|  S qd S )N   r   )lenr    nextitermetagettargettorchopsZatenZcopy_default)r*   user   r   r   r   &get_mutating_use_stack_trace_from_nodeB   s   
r8   placeholder_infoc                 C  s   | j S N)r!   )r9   r   r   r   get_mutating_use_stack_traceQ   s   r;   c                 C  sL   | j }| jdd }g }d }| jdkrdd | jD }t| }t||||S )Nr   placeholderc                 S  s   g | ]}t |qS r   )to_placeholder_info.0ir   r   r   
<listcomp>\       z'to_placeholder_info.<locals>.<listcomp>)r   r1   r2   opr    r8   r   )r*   r   r   r    r!   r   r   r   r=   U   s   
r=   graphtorch.fx.Graphr   c                 C  s   dd | j D S )Nc                 S  s   g | ]}|j d krt|qS )r<   )rC   r=   )r?   noder   r   r   rA   e   s    z(get_placeholder_info.<locals>.<listcomp>)nodes)rD   r   r   r   get_placeholder_infod   s   rH   reasonr   c                 C  s
   d|  S )Nzskipping cudagraphs due to r   )rI   r   r   r   format_default_skip_messagej   s   
rJ   r(   r'   mutation_indicesr$   c                 C  sP   d}|D ]}| | }t | }r nqtdt| d}|r&| d| S |S )N zmutated inputs (z instances). Found from : 
 )r;   rJ   r.   )r(   rK   r   idxr<   msgr   r   r   get_mutation_stack_tracen   s   rP   funcinputslist[InputType]is_cuda_graph_recorded_tensorCallable[[torch.Tensor], bool]c                   sZ   t jjjjr fdd jD }n j}td j td| |r+t	 j
|S d S )Nc                   s&   g | ]}| j v s| s|qS r   )r%   r?   rN   rQ   rR   rT   r   r   rA      s    

z&check_for_mutation.<locals>.<listcomp>z'check mutation static input indices: %sz#check mutation mutation indices: %s)r4   	_inductorconfigtritoncudagraph_treesr)   static_inputs_logdebugr%   rP   r(   )rQ   rR   rT   rK   r   rW   r   check_for_mutation   s   	r^   rF   c                 C  s*   | j D ]}|jdd  }r|  S qd S )Nr   )r    r1   r2   )rF   r7   r   r   r   r   _get_use_stack_trace   s
   
r_   device_node_mapping!dict[torch.device, torch.fx.Node]c                 C  s   |  td }r$d|j d}t| }r t| d| S t|S t| dkr7tt| 	 j
dkr7d S dd | 	 D }td	d
| S )Ncpuzcpu device ()rM   r-   cudac                 s  s    | ]}t |V  qd S r:   )repr)r?   keyr   r   r   	<genexpr>   s    z:check_multiple_devices_or_any_cpu_nodes.<locals>.<genexpr>zmultiple devices: z, )r2   r4   Zdevicer   r_   rJ   r.   r/   r0   keystypejoin)r`   Zcpu_noderO   r   Z	keys_reprr   r   r   'check_multiple_devices_or_any_cpu_nodes   s   rk   c                 C  s   t | S r:   )rk   )r`   r   r   r    check_lowering_disable_cudagraph   s   rl   rO   Nonec                 C  sD   t |  td d  d7  < t }| r |jd| dd d S d S )NZinductorZcudagraph_skipsr-   Zcudagraph_skip_reasonT)	overwrite)perf_hint_logwarningr	   r
   Zin_progressset)rO   Zmetrics_contextr   r   r   #log_cudagraph_skip_and_bump_counter   s   
rr   c                   @  s    e Zd ZU ded< d	ddZdS )
BoxedDeviceIndexOptional[int]value
device_idxr,   rm   c                 C  s    |d u st |tsJ || _d S r:   )
isinstancer   ru   )selfrv   r   r   r   rq      s   
zBoxedDeviceIndex.setN)rv   rt   r,   rm   )r   r   r   r   rq   r   r   r   r   rs      s   
 rs   gmtorch.fx.GraphModulemutated_inputsOrderedSet[str]r)   OrderedSet[int]r%   c                   sn   t d}tjjjjr+t|  fdd|D }t|dk}|s!d S t| j	}t
||S t|dk}|s5d S |S )Nzmutated inputsc                   s   g | ]}| vr|qS r   r   rV   Zunique_idxsr   r   rA      s    zGcheck_for_mutation_ignore_cuda_graph_managed_tensor.<locals>.<listcomp>r   )rJ   r4   rX   rY   rZ   r[   r   r.   rH   rD   rP   )ry   r{   r)   r%   Zdefault_msgrK   Zhas_mutationr(   r   r~   r   3check_for_mutation_ignore_cuda_graph_managed_tensor   s   

r   r<   c                 C  s,   | j r| j S | jD ]
}|j r|j   S q	dS )zM
    Gets the first non-empty stack trace of a placeholder or its users.
    N)r   r    )r<   userr   r   r   get_placeholder_stack_trace   s   

r   c                   @  s&   e Zd ZdZdZdZdZd
ddZd	S )CheckInvariantStatusr-            r,   r   c                 C  s<   | j dkrdS | j dkrdS | j dkrdS | j  d| j S )NCudagraphManagedIdxMismatchz-cudagraph managed tensor data pointer changedStaticInputIdxMismatchz!static input data pointer changed&ExpectedDeadIndicesBeforeGraphMismatchz+expected dead indices before graph are livez: )r   ru   )rx   r   r   r   __str__  s   


zCheckInvariantStatus.__str__Nr,   r   )r   r   r   SUCCESSr   r   r   r   r   r   r   r   r      s    r   recorded_data_ptrSequence[Optional[int]]target_idxsmismatchc                   s   t  t krt  t | ksJ d fdd|D }fdd|D }| d}tt||D ]2\}\}	}
t|	tjs@J || }|	 |
krd| | }| d|j d|
 d|	  d	t| d

}q2|S )z}
    Logs the mismatch between input data pointers and recorded data pointers.
    This checks only idxs in target_idxs.
    zClength mismatch between inputs, recorded_data_ptr, and placeholdersc                      g | ]} | qS r   r   r>   )rR   r   r   rA     rB   z)log_data_ptr_mismatch.<locals>.<listcomp>c                   r   r   r   r>   )r   r   r   rA      rB   z.
zinput name: z. data pointer changed from z to z. input stack trace: 
)	r.   	enumerateziprw   r4   Tensordata_ptrr   r   )r(   rR   r   r   r   Z	t_tensorsZt_data_ptrs	error_msgr@   Ztensorr   indexr<   r   )rR   r   r   log_data_ptr_mismatch  s*   "
r   fn_cache)dict[tuple[int, ...], Callable[..., Any]]new_int_keyr   boolc                   sN   t |  d  d fdd}tjjjjr% tjjjjkr%t|  dS dS )	Nr-   r,   r   c                     s   d  dS )NzCUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed a0   distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.r   r   Znum_cudagraphsr   r   warn_msg5  s   z4maybe_warning_due_to_dynamic_shape.<locals>.warn_msgTFr   )	r.   rh   r4   rX   rY   rZ   Z"cudagraph_dynamic_shape_warn_limitro   rp   )r   r   r   r   r   r   "maybe_warning_due_to_dynamic_shape/  s   

r   c                   @  s*   e Zd ZU dZded< ded< ded< dS )	CudagraphCachedInfoz'
    Info needed to realign inputs
    r'   r(   zlist[Optional[str]]Zstack_tracesz	list[str]Zcudagraph_fail_reasonsNr   r   r   r   r   r   L  s
   
 r   )r*   r+   r,   r   )r9   r   r,   r   )r*   r+   r,   r   )rD   rE   r,   r   )rI   r   r,   r   )r(   r'   rK   r$   r,   r   )rQ   r"   rR   rS   rT   rU   r,   r   )rF   r+   r,   r   )r`   ra   r,   r   )rO   r   r,   rm   )
ry   rz   r{   r|   r)   r}   r%   r$   r,   r   )r<   r   r,   r   )r(   r'   rR   rS   r   r   r   r$   r   r   r,   r   )r   r   r   r   r,   r   )5
__future__r   dataclassesenumr   typingr   r   r   r   r   r4   Ztorch._dynamo.utilsr	   r
   Ztorch._inductor.utilsr   Ztorch.utils._ordered_setr   collections.abcr   Z_loggingZgetArtifactLoggerr   ro   r\   listr   r   Z
OutputTypeZ	ModelType	dataclassr   r   r"   r8   r;   r=   rH   rJ   rP   r^   r_   rk   rl   rr   rs   r   r   r   r   r   r   r   r   r   r   <module>   sT   


















