o
    ‡ZŽhò  ã                	   @   s 
  U d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZ e j dd¡dkZdZdee fd	d
„Zdee fdd„Zdee fdd„Zdee fdd„Zdee fdd„Zdefdd„Ze j dd¡dkZdZdZdZeddddZ ee!d< eƒ Z"ee e!d< eƒ Z#ee e!d< dZ$ee!d< eƒ Z%ee e!d< eƒ Z&ee e!d< e j d ¡dkZ'ee!d!< dZ(ee) e!d"< d#Z*ed$ e!d%< d#Z+ed$ e!d&< e j d'd(¡dkZ,ee!d)< e j d*d¡dkZ-dZ.dZ/e j d+d¡dkZ0e j d,¡dkZ1e j d-d¡dkZ2dZ3dZ4dZ5e j d.d(¡dkZ6e j d/¡dkZ7e j d0d1¡Z8ed2 e!d3< dZ9dZ:eƒ Z;dZ<dZ=dZ>dZ?e	j@jAjBe!d4< dZCe	j@jAjBe!d5< dZDeee	jEjFgdf  e!d6< dZGeee	jEjFgdf  e!d7< dZHeee	jEjIjFgdf  e!d8< dZJeeeKd9 geKd9 f  e!d:< dZLdZMdZNdZOdZPi ZQeReSeReSef f e!d;< i ZTeReSeReSef f e!d<< dZUe j d=d¡dkZVdZWdZXdd>d?dd@œZYeReSef e!dA< dBZZedC e!dD< dZ[g dE¢Z\eKeeSeeKd9 geKd9 f f  e!dF< dZ]dGZ^dHZ_dIZ`eddJdKdLZaee!dM< e j dN¡dkZbe j dO¡dkZce j dP¡dkZddQZeee) e!dR< dZfeƒ r.dne j dS¡dkZge j dTdU¡ h¡ Zie j dVdW¡ h¡ Zje j dXdY¡ h¡ ZkedZ e!d[< e j d\d¡dkZld]Zme j d^¡dkZne j d_¡dkZoe j d`¡dkZpdaZqdbZrdcZse j dd¡dkZte j de¡dkZue j df¡dkZve)e j dgd¡ƒZwe j dhdi¡Zxe j djdk¡ZydleSdefdmdn„ZzdleSdefdodp„Z{dleSdefdqdr„Z|e j dsdY¡Z}e	j~jsØdnd(Z€e j dte€¡dkZe j dud(¡dkZ‚e j dvd¡dkZƒe j dw¡dkZ„dxZ…dyZ†dzZ‡dZˆdZ‰dZŠe j d{¡dkZ‹ee!d|< e j d}¡dkZŒee!d~< e j ddi¡Ze j d€d(¡dkZŽee!d< dQZe j d‚d¡dkZd?Z‘dƒZ’dzZ“dZ”dzZ•dZ–dZ—dZ˜e j d„d(¡dkZ™dZšdZ›dZœdZdZždZŸd?Z d?Z¡dZ¢dZ£dZ¤e j d…d(¡dkZ¥d†e	j¦v p‚d‡e	j¦v Z§eƒ pˆe§Z¨e j dˆd¡dkZ©dZªeeS e!d‰< dZ«eeS e!dŠ< deSfd‹dŒ„Z¬e¬ƒ Z­eSe!d< dZ®dIZ¯dŽdgZ°eKeed eSf  e!d‘< dZ±ee!d’< G d“d”„ d”ƒZ²defd•d–„Z³de)fd—d˜„Z´eƒ rådne´ƒ Zµee) e!d™< eeS e!dš< eƒ r'z d d›l¶m·Z· e¸re· ¹e jº »e¸ ¼dœe j½¡d¡¡Z¾ne· ¹d¡Z¾W n e¿eÀfy&   dZ¾Y nw dZ¾dQZÁe j džd¡dkZÂe j dŸd¡dkZÃdZÄdZÅd ZÆd¡ZÇdZÈdZÉdZÊee!d¢< e j d£d(¡dkZËdZÌdZÍdZÎdZÏe j d¤di¡ZÐeÐdikZÑeÐdkrrdineÐZÒe j d¥d¡ZÓeeS e!d¦< e j d§¡dkZÔdZÕe j d¨d(¡dkZÖee!d©< dZ×ee!dª< dZØee!d«< dZÙee!d¬< dZÚee!d­< dZÛee!d®< dZÜee!d¯< e j d°d(¡dkZÝe j d±d(¡dkZÞee!d²< G d³d´„ d´ƒZßG dµd¶„ d¶ƒZàG d·d¸„ d¸ƒZáG d¹dº„ dºƒZâG d»d¼„ d¼ƒZãd´Zäed½ e!d¾< d¶Zåed¿ e!dÀ< G dÁdÂ„ dÂƒZæG dÃdÄ„ dÄƒZçg dÅ¢ZèeKeS e!dÆ< g dÇ¢ZéeKeS e!dÈ< g ZêeKee	jëe	jëe	jëgdf  e!dÉ< G dÊdË„ dËƒZìerGd dÌlíT eejîeï ƒ dS )Íé    N)ÚAnyÚCallableÚLiteralÚOptionalÚTYPE_CHECKINGÚUnion)Ú	is_fbcode)ÚConfigÚget_tristate_envÚinstall_config_moduleZTORCHINDUCTOR_INPLACE_PADDINGÚ1FÚreturnc                   C   ó   t dƒS )NZ#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHE©r
   © r   r   úE/var/www/auris/lib/python3.10/site-packages/torch/_inductor/config.pyÚfx_graph_remote_cache_default   ó   r   c                   C   s,   t j d¡dkr
dS t j d¡dkrdS d S )NÚTORCHINDUCTOR_VEC_ISA_OKr   TÚ0F)ÚosÚenvironÚgetr   r   r   r   Úvec_isa_ok_default   s
   r   c                   C   r   )NZ#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   Úautotune_remote_cache_default   r   r   c                   C   r   )NZ+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   Ú%bundled_autotune_remote_cache_default   r   r   c                   C   s   t dtƒ sdƒS d ƒS )NZ/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r   r   r   r   r   Ú)bundle_triton_into_fx_graph_cache_default#   s   þþr   c                  C   s@   d} dt jv rt j d¡dkS tƒ rd}tj |¡}|| kS dS )Nr   ZTORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionT)r   r   r   r   ÚtorchÚ_utils_internalÚjustknobs_getval_int)ZENABLE_PROLOGUE_FUSION_VERSIONÚjk_nameÚversionr   r   r   Úprologue_fusion_enabled*   s   
r"   Z"TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2Tz0pytorch/remote_cache:enable_local_fx_graph_cacheZTORCHINDUCTOR_FX_GRAPH_CACHE)ÚjustknobÚenv_name_forceÚdefaultÚfx_graph_cacheÚfx_graph_remote_cacheÚ!bundle_triton_into_fx_graph_cacheÚautotune_local_cacheÚautotune_remote_cacheÚbundled_autotune_remote_cacheZ"TORCHINDUCTOR_FORCE_DISABLE_CACHESÚforce_disable_cachesÚsleep_sec_TESTING_ONLYÚneeds_fixed_stride_order)r.   Zflexible_layoutÚ#custom_op_default_layout_constraintÚ'triton_kernel_default_layout_constraintZTORCHINDUCTOR_CPP_WRAPPERr   Úcpp_wrapperZTORCHINDUCTOR_ONLINE_SOFTMAXZTORCHINDUCTOR_SIZE_ASSERTSZTORCHINDUCTOR_NAN_ASSERTSZTORCHINDUCTOR_SCALAR_ASSERTSZTORCHINDUCTOR_MEMORY_PLANNINGZTORCHINDUCTOR_USE_FAST_MATHZTORCHINDUCTOR_MEMORY_POOLÚintermediates)Únoner2   ZoutputsÚcombinedÚmemory_poolÚpost_grad_custom_pre_passÚpost_grad_custom_post_passÚjoint_custom_pre_passÚjoint_custom_post_passÚpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNodeÚ_pre_fusion_custom_passÚpre_grad_fusion_optionsÚpost_grad_fusion_optionsZ"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-Cëâ6?é   )Zpre_gradÚ	precisionZnum_iterationsZrequires_optimizerÚfx_passes_numeric_checkÚ	heuristic)r%   ÚtritonZatenrA   Úmixed_mm_choice)Zreorder_compute_for_overlapZ
sink_waitsZraise_commsÚ'reorder_for_compute_comm_overlap_passesr%   i,  é   Z*TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r%   r$   r#   Úuse_experimental_benchmarkerZTORCHINDUCTOR_MAX_AUTOTUNEZ$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISEZTORCHINDUCTOR_MAX_AUTOTUNE_GEMMé
   Úautotune_num_choices_displayedZ"TORCHINDUCTOR_FORCE_SAME_PRECISIONZ(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPPZ(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITONZ,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEÚDEFAULT)rI   Z
EXHAUSTIVEÚmax_autotune_gemm_search_spaceZ'TORCHINDUCTOR_AUTOTUNE_FALLBACK_TO_ATENi    Z#TORCHINDUCTOR_SEARCH_AUTOTUNE_CACHEZTORCHINDUCTOR_SAVE_ARGSZ!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g      ð?g       @Z#TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICEZ'TORCHINDUCTOR_COORDINATE_DESCENT_TUNINGZ5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONSZ'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUSZ#TORCHINDUCTOR_AUTOHEURISTIC_COLLECTÚ ZTORCHINDUCTOR_AUTOHEURISTIC_USEZmixed_mmÚnamec                 C   s   t | ƒpt| ƒS )N)Úcollect_autoheuristicÚuse_autoheuristic©rL   r   r   r   Úrun_autoheuristicÀ  s   rP   c                 C   ó   | t jjj d¡v S ©Nú,)r   Ú	_inductorÚconfigÚautoheuristic_collectÚsplitrO   r   r   r   rM   Ä  ó   rM   c                 C   rQ   rR   )r   rT   rU   Úautoheuristic_userW   rO   r   r   r   rN   È  rX   rN   Z$TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATHZ!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONZTORCHINDUCTOR_FORCE_LAYOUT_OPTZ TORCHINDUCTOR_KEEP_OUTPUT_STRIDEZTORCHINDUCTOR_WARN_MIX_LAYOUTé   é   é   ZTORCHINDUCTOR_DEBUG_FUSIONÚdebug_fusionZTORCHINDUCTOR_BENCHMARK_FUSIONÚbenchmark_fusionZ#TORCHINDUCTOR_ENABLED_METRIC_TABLESZ(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONÚloop_ordering_after_fusionZ'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSIONé@   ZTORCHINDUCTOR_BENCHMARK_KERNELZ%TORCHINDUCTOR_EMULATE_PRECISION_CASTSÚdevÚgitZ0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORÚadd_pre_grad_passesÚremove_pre_grad_passesc                  C   s4   dt jv rt jd } nd} | dv sJ d| › ƒ‚| S )NZTORCHINDUCTOR_WORKER_STARTÚ
subprocess)re   ÚforkZspawnzInvalid start method: )r   r   )Zstart_methodr   r   r   Údecide_worker_start_methodo  s   

ürg   Úworker_start_methodZfuse_ddp_with_concat_opZschedule_comm_wait).NÚ_fuse_ddp_communication_passesÚ_micro_pipeline_tpc                   @   s&   e Zd ZU dZeed< dZeed< dS )Ú_collectiveFÚauto_selecti   Ú#one_shot_all_reduce_threshold_bytesN)Ú__name__Ú
__module__Ú__qualname__rl   ÚboolÚ__annotations__rm   Úintr   r   r   r   rk   ˜  s   
 rk   c                  C   s   d} d}t j |¡}| |kS )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    r>   z0pytorch/inductor:enable_parallel_compile_version)r   r   r   )ZENABLE_PARALLEL_COMPILE_VERSIONr    r!   r   r   r   Ú#parallel_compile_enabled_internally  s   rt   c                  C   s¶   ddl } |  t¡}dtjv rttjd ƒ}| d|¡ |S tjdkr+d}| d¡ |S t	ƒ r:t
ƒ s:d}| d¡ |S ttd	ƒrFtt d¡ƒnt ¡ }|sNJ ‚td
|ƒ}| d|¡ |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NZTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envÚwin32r>   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodeÚsched_getaffinityé    zcompile_threads set to %d)ÚloggingÚ	getLoggerrn   r   r   rs   ÚinfoÚsysÚplatformr   rt   ÚhasattrÚlenrv   Ú	cpu_countÚmin)rx   ÚlogÚcompile_threadsr   r   r   r   Údecide_compile_threads«  s,   


ð
ó
ùÿý
rƒ   r‚   Úglobal_cache_dir)ÚparutilÚ.zfb/cacheZTORCHINDUCTOR_SHAPE_PADDINGZ#TORCHINDUCTOR_COMPREHENSIVE_PADDINGé€   i   Úforce_shape_padZTORCHINDUCTOR_PERMUTE_FUSIONZTORCHINDUCTOR_PROFILEZTORCHINDUCTOR_PROFILE_OUTPUTÚprofile_bandwidth_outputZ3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGZTORCHINDUCTOR_FREEZINGÚfreezingÚfreezing_discard_parametersÚdecompose_mem_bound_mmÚassume_aligned_inputsÚ.unsafe_ignore_unsupported_triton_autotune_argsÚ"check_stack_no_cycles_TESTING_ONLYÚ*always_complex_memory_overlap_TESTING_ONLYZ*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGZTORCHINDUCTOR_ANNOTATE_TRAININGÚannotate_trainingc                   @   sŒ  e Zd ZU dZej dd¡dkZej dd¡dkZdZ	e
e ed< eej dd	¡ƒZdej d
ejdkr6dnd¡fZeed ef ed< ej dd¡dkZej dd¡dkZdZe
e ed< dZe
e ed< edƒZe
e ed< dZeeed f ed< eej dd¡ƒZej dd¡dkZej dd¡dkZej dd¡Z ej dd¡dkZ!dZ"eej d d¡ƒZ#ej d!d¡Z$ej d"d¡Z%d#Z&dZ'dS )$ÚcppéÿÿÿÿZ$TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   Z!TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   NÚsimdlenZ TORCHINDUCTOR_CPP_MIN_CHUNK_SIZEZ4096ÚCXXÚdarwinzclang++zg++ÚcxxZ'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILEZ TORCHINDUCTOR_CPP_WEIGHT_PREPACKÚinject_relu_bug_TESTING_ONLYÚinject_log1p_bug_TESTING_ONLYr   Ú
vec_isa_okÚoriginal_aten©r   r›   Zinductor_nodeÚdescriptive_namesZ,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZEZ16Z-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUMZ-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAGZ5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGÚoffZ)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICFZ#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICESZ%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKINGZ%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST)(rn   ro   rp   Úthreadsr   r   r   Zno_redundant_loopsZdynamic_threadsr”   r   rs   rr   Zmin_chunk_sizer{   r|   r—   Útupler   ÚstrZenable_kernel_profileZweight_prepackr˜   r™   r
   rš   rq   r   r   Zmax_horizontal_fusion_sizeZfallback_scatter_reduce_sumZenable_unsafe_math_opt_flagZ#enable_floating_point_contract_flagZenable_tiling_heuristicsZenable_grouped_gemm_templateZgemm_max_k_slicesZgemm_cache_blockingZgemm_thread_factorsZenable_loop_tail_vecZenable_concat_linearr   r   r   r   r’   g  sL   
 ÿþÿþ
ÿÿÿÿÿÿr’   c                   @   s‚  e Zd ZU ej d¡dkZdZdZdZ	dZ
eƒ rdndZdZdZee ed< dZdZdZdZdZdZdZdZdZeed	< dZdZd
Zee ed< dZeed< dZ dZ!ej dd¡dkZ"ej dd¡dkZ#dZ$e%ee&d f ed< ej dd¡dkZ'ej dd¡dkZ(dZ)eej dd¡ƒZ*e&d ed< dZ+dZ,dZ-dZ.eed< dZ/d
Z0ee1 ed< dZ2ej dd¡dkZ3ej dd¡dkZ4dZ5d
S )rB   ZTORCHINDUCTOR_CUDAGRAPHSr   TFr‡   é2   Ú"cudagraph_dynamic_shape_warn_limité   Úprefer_nd_tilingNÚautotune_at_compile_timeÚtile_reductionsZ!TORCHINDUCTOR_UNIQUE_KERNEL_NAMESZ&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr   r›   rœ   r   Z#TORCHINDUCTOR_PERSISTENT_REDUCTIONSZ$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSZTORCHINDUCTOR_MULTI_KERNEL)r   r>   r¤   é   Úmulti_kernelé   é   Úspill_thresholdr˜   ZENABLE_PERSISTENT_TMA_MATMULZTORCHINDUCTOR_SKIP_L1)6rn   ro   rp   r   r   r   Z
cudagraphsZcudagraph_treesZcudagraph_skip_dynamic_graphsZslow_path_cudagraph_assertsZ!cudagraph_trees_history_recordingr   Z cudagraph_support_input_mutationZ#cudagraph_unexpected_rerecord_limitr£   r   rs   rr   Zforce_cudagraph_syncZforce_cudagraphs_warmupZfast_path_cudagraph_assertsZskip_cudagraph_warmupZdebug_sync_graphZdebug_sync_kernelZdense_indexingZ	max_tilesr¥   rq   Zautotune_pointwiseZautotune_cublasLtr¦   r§   Z tiling_prevents_pointwise_fusionZ tiling_prevents_reduction_fusionZunique_kernel_namesZunique_user_kernel_namesr   r   r   Zpersistent_reductionsZcooperative_reductionsZforce_cooperative_reductionsr©   Zdivisible_by_16Zmin_split_scan_rblockZstore_cubinr¬   Zuse_block_ptrr˜   r¡   Zcodegen_upcast_to_fp32Zenable_persistent_tma_matmulZskip_l1_cacheZ.disallow_failing_autotune_kernels_TESTING_ONLYr   r   r   r   rB   Ô  sf   
 ÿ
ÿþ
ÿÿÿÿÿrB   c                   @   s0  e Zd ZU dZej dd¡dkZej dd¡dkZej dd¡Z	e
d ed< ej d	d
¡ZdZdZdZeed< dZeed< dZeed< dZeed< i Zeeef ed< ej dd¡dkZeed< ej dd¡dkZeed< eej dd¡ƒZeed< i Zeeef ed< dZeed< dZeed< dZeed< d
S )Úaot_inductorrK   ZAOT_INDUCTOR_DEBUG_COMPILEr   r   Z$AOT_INDUCTOR_COMPILE_WRAPPER_WITH_O0Z-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   Ú2Ú3Ú debug_intermediate_value_printerZ&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFÚuse_runtime_constant_foldingÚforce_mmap_weightsÚpackageÚpackage_cpp_onlyÚmetadataZ/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATIONÚ#raise_error_on_ignored_optimizationZDUMP_AOTI_MINIFIERÚdump_aoti_minifierZAOTINDUCTOR_REPRO_LEVELr¤   Úrepro_levelÚpresetsÚallow_stack_allocationÚuse_minimal_arrayref_interfaceTÚpackage_constants_in_so) rn   ro   rp   Zoutput_pathr   r   r   Zdebug_compileZcompile_wrapper_with_O0r°   r   rr   Zfiltered_kernel_namesZserialized_in_specZserialized_out_specr±   rq   r²   r³   r´   rµ   Údictr¡   r¶   r·   rs   r¸   r¹   r   rº   r»   r¼   r   r   r   r   r­   ‡  s4   
 ÿ	ÿÿ
ÿr­   c                
   @   s  e Zd ZU dZee ed< dZee ed< dZe	d ed< dZ
dZdZdZej dej ej ej ej¡d	¡¡¡ZdZee ed
< g d¢Zee ed< dZee ed< dZeed< ej dd¡dkZeed< ej d¡Z ee ed< ej d¡Z!ee ed< ej dd¡Z"eed< dS )ÚcudaNÚarchr!   ú-O1)ú-O0rÀ   ú-O2ú-O3z-OSÚcompile_opt_levelFZTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/Úcutlass_max_profiling_configs)r>   r¤   rZ   Ú%cutlass_max_profiling_swizzle_optionsÚcuda_cxxr>   Úcutlass_backend_min_gemm_sizeZ/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   Úgenerate_test_runnerZTORCHINDUCTOR_CUTLASS_ALLOWLISTÚcutlass_op_allowlist_regexZTORCHINDUCTOR_CUTLASS_DENYLISTÚcutlass_op_denylist_regexZ)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELÚcutlass_instantiation_level)#rn   ro   rp   r¿   r   r¡   rr   r!   rÄ   r   Zenable_cuda_ltoZenable_ptxas_infoZenable_debug_infoÚuse_fast_mathr   r   r   ÚpathÚabspathÚjoinÚdirnamer   Ú__file__Zcutlass_dirrÅ   rs   rÆ   ÚlistrÇ   rÈ   rÉ   rq   rÊ   rË   rÌ   r   r   r   r   r¾   á  s:   
 ÿþ

ÿÿÿ
ÿr¾   c                   @   sÈ   e Zd ZU g Zee ed< ddgZee ed< dZe	d ed< dZ
dZd	Zd	ZdZd
Zee ed< ej d¡Zej dd¡dkZeed< d
Zee ed< dZeed< d
Zeee  ed< dZeed< d
S )Úrocmr¿   Zgfx90aZgfx942Úck_supported_archrÂ   )	rÁ   rÀ   rÂ   rÃ   z-Osz-Ozz-Ominz-Ofastz-OmaxrÄ   FTNÚ	rocm_homeZTORCHINDUCTOR_CK_DIRZ-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   rÉ   Ún_max_profiling_configsÚuse_preselected_instancesÚkBatch_sweepr«   Úsplit_k_threshold)rn   ro   rp   r¿   rÓ   r¡   rr   rÕ   rÄ   r   Zis_debugZ
save_tempsrÍ   Zflush_denormalsZprint_kernel_resource_usagerÖ   r   r   r   r   Zck_dirrÉ   rq   r×   rs   rØ   rÙ   rÚ   r   r   r   r   rÔ   ;  s(   
 þÿ
ÿrÔ   )r’   rB   ÚhalideÚcpu_backend)rB   rÛ   Úcuda_backendc                   @   sB   e Zd ZU dZdZdZed ed< dZed ed< dZ	dZ
dZd	S )
rÛ   Úhostz	host-cudaÚAnderson2021)rß   ZLi2018Ú	Adams2019ZMullapudi2016Úscheduler_cudarà   Úscheduler_cpuFN)rn   ro   rp   Z
cpu_targetZ
gpu_targetrá   r   rr   râ   ZassertsÚdebugZscan_kernelsr   r   r   r   rÛ   |  s   
 ÿÿrÛ   c                   @   sÔ   e Zd ZU ej dd¡dkZej dd¡dkZdZe	e
 ed< dZdZdZdZdZdZdZej d	d¡dkZej d
d¡dkZej dd¡Zej dd¡ZdZdZe	ee
gdf  ed< dZeed< dZeed< dS )ÚtraceZTORCH_COMPILE_DEBUGr   r   ZTORCH_COMPILE_DEBUG_SAVE_REALNÚ	debug_dirFTZINDUCTOR_POST_FUSION_SVGZINDUCTOR_ORIG_FX_SVGZINDUCTOR_DOT_GRAPH_SHAPE_SVGZ INDUCTOR_LOG_URL_FOR_GRAPH_XFORMÚ
upload_tarÚlog_autotuning_resultsÚ1log_inductor_triton_kernel_to_post_grad_node_info)rn   ro   rp   r   r   r   ÚenabledZsave_real_tensorsrå   r   r¡   rr   Ú	debug_logZinfo_logZfx_graphZfx_graph_transformedZir_pre_fusionZir_post_fusionZoutput_codeZgraph_diagramZdraw_orig_fx_graphZdot_graph_shapeZlog_url_for_graph_xformZcompile_profileræ   r   rç   rq   rè   r   r   r   r   rä   ˜  s&   
 
rä   )ztrace.upload_tarr8   r9   r:   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierÚ_save_config_ignore)rä   zcuda.cutlass_dirrh   r‚   r7   r6   r   Ú_cache_config_ignore_prefixÚexternal_matmulc                   @   sR   e Zd ZU dZeed< dZee ed< dZ	dZ
ee ed< dZee ed< dZdS )Útest_configsFÚ%force_extern_kernel_in_multi_templateNÚmax_mm_configsÚautotune_choice_name_regexÚautotune_choice_desc_regex)rn   ro   rp   rï   rq   rr   rð   r   rs   Zruntime_triton_dtype_assertrñ   r¡   rò   Z*graphsafe_rng_func_ignores_fallback_randomr   r   r   r   rî   ø  s   
 rî   )Ú*)ðr   r{   Útypingr   r   r   r   r   r   r   Z!torch._inductor.custom_graph_passZtorch._environmentr   Ztorch.utils._config_moduler	   r
   r   r   r   Zinplace_paddingZcan_inplace_pad_graph_inputrq   r   r   r   r   r   r"   Zenable_auto_functionalized_v2rã   Zdisable_progressZverbose_progressr&   rr   r'   r(   r)   r*   r+   r,   r-   rs   r/   r0   r1   Zonline_softmaxZdceZstatic_weight_shapesZsize_assertsZnan_assertsZscalar_assertsZpick_loop_ordersZinplace_buffersZallow_buffer_reuseZmemory_planningrÍ   r5   Zbenchmark_harnessZepilogue_fusionZprologue_fusionZepilogue_fusion_firstZpattern_matcherZb2b_gemm_passr6   rT   Zcustom_graph_passZCustomGraphPassTyper7   r8   ZfxZGraphr9   r:   Úgraphr;   rÓ   Zsplit_cat_fx_passesZ efficient_conv_bn_eval_fx_passesZis_predispatchZgroup_fusionZbatch_fusionr<   r½   r¡   r=   Zreorder_for_localityZdynamic_scale_rblockZforce_fuse_int_mm_with_mulZuse_mixed_mmr@   rC   Z reorder_for_compute_comm_overlaprD   Zreorder_for_peak_memoryZestimate_op_runtimeZintra_node_bwZinter_node_bwrF   Zmax_autotuneZmax_autotune_pointwiseZmax_autotune_gemmrH   Zgraph_partitionZforce_same_precisionÚupperZmax_autotune_gemm_backendsZmax_autotune_conv_backendsrJ   Zautotune_fallback_to_atenZunbacked_symint_fallbackZsearch_autotune_cacheZ	save_argsZautotune_in_subprocZ+max_autotune_subproc_result_timeout_secondsZ-max_autotune_subproc_graceful_timeout_secondsZ.max_autotune_subproc_terminate_timeout_secondsZautotune_multi_deviceZcoordinate_descent_tuningZ'coordinate_descent_check_all_directionsZ coordinate_descent_search_radiusrV   rY   rP   rM   rN   Zautoheuristic_log_pathr!   ZhipZlayout_opt_defaultZlayout_optimizationZforce_layout_optimizationZkeep_output_strideZwarn_mix_layoutZrealize_reads_thresholdZrealize_opcount_thresholdZrealize_acc_reads_thresholdZfallback_randomZimplicit_fallbacksZaggressive_fusionr]   r^   Zenabled_metric_tablesr_   Zscore_fusion_memory_thresholdZbenchmark_epilogue_fusionZ max_epilogue_benchmarked_choicesZmax_fusion_sizeZmax_pointwise_cat_inputsZforce_pointwise_catZunroll_reductions_thresholdZcomment_originZconv_1x1_as_mmZsplit_reductionsZbenchmark_kernelZconstant_and_index_propagationZalways_keep_tensor_constantsZassert_indirect_indexingZcompute_all_boundsZcombo_kernelsZbenchmark_combo_kernelZcombo_kernels_autotuneZcombo_kernel_allow_mixed_sizesZ#combo_kernel_foreach_dynamic_shapesZjoint_graph_constant_foldingZdebug_index_assertsZemulate_precision_castsÚ__version__Zis_nightly_or_sourceZdeveloper_warningsZ"optimize_scatter_upon_const_tensorrc   rd   rg   rh   Z_fuse_ddp_communicationZ_fuse_ddp_bucket_sizeri   rj   rk   rt   rƒ   r‚   Zlibfb.pyr…   Ú__package__Zget_dir_pathrÎ   rÐ   ÚreplaceÚsepr„   Ú
ValueErrorÚImportErrorZkernel_name_max_opsZshape_paddingZcomprehensive_paddingZpad_channels_lastZdisable_padding_cpuZpadding_alignment_bytesZpadding_stride_thresholdZpad_outputsZbw_outputs_user_visiblerˆ   Zpermute_fusionZprofiler_mark_wrapper_callZgenerate_intermediate_hooksZdebug_ir_tracebackZ_raise_error_for_testingZ_profile_varZprofile_bandwidthZprofile_bandwidth_regexr‰   Z/profile_bandwidth_with_do_bench_using_profilingZdisable_cpp_codegenrŠ   r‹   rŒ   r   rŽ   r   r   Zenable_linear_binary_foldingr‘   r’   rB   r­   r¾   rÔ   rÜ   rÝ   rÛ   rä   rë   rì   rí   ZTensorrî   Ztorch.utils._config_typingÚmodulesrn   r   r   r   r   Ú<module>   s>  
  ÿý
ÿ	þÿþÿÿ 
ûÿÿÿüøÿÿÿÿÿýÿÿþÿþÿþÿÿÿÿÿÿ
ÿÿÿ	ÿþ%ÿ
€ÿÿÿÿÿm 4ZZ;C
$