a
    ‘º”hÉ ã                	   @   s  U d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZ e j dd¡dkZdZee dœd	d
„Zee dœdd„Zee dœdd„Zee dœdd„Zee dœdd„Zedœdd„Zedœdd„Ze j dd¡dkZdZdZdZ dZ!e"e#d< eddddZ$ee#d< eƒ Z%ee e#d< eƒ Z&ee e#d < ed!d"ddZ'ee#d#< dZ(ee#d$< eƒ Z)ee e#d%< eƒ Z*ee e#d&< ed'd(ddZ+ee#d)< dZ,ee#d*< i Z-e.e/e/f e#d+< dZ0ee" e#d,< d-Z1ed. e#d/< e j d0d1¡dkZ2ee#d2< e j d3d1¡dkZ3ee#d4< eƒ  Z4ee#d5< e j d6d¡dkZ5dZ6dZ7e j d7d¡dkZ8e j d8¡dkZ9e j d9d¡dkZ:e j d:eƒ r”d1nd¡dkZ;dZ<dZ=dZ>e j d;d1¡dkZ?e j d<¡dkZ@dZAe j d=d>¡ZBed? e#d@< dZCdZDeƒ ZEdZFdZGdZHdZIe	jJjKjLe#dA< dZMe	jJjKjLe#dB< dZNeee	jOjPgdf  e#dC< dZQeee	jOjPgdf  e#dD< dZReee	jOjSjPgdf  e#dE< dZTeeeUdF geUdF f  e#dG< dZVeeeUdF geUdF f  e#dH< dZWdZXdZYdZZdZ[i Z\e.e/e.e/ef f e#dI< i Z]e.e/e.e/ef f e#dJ< dZ^e j dKd¡dkZ_dZ`dZaddLdMddNœZbe.e/ef e#dO< dPZcedQ e#dR< dZdg dS¢ZeeUee/eeUdF geUdF f f  e#dT< dZfee" e#dU< dZgdVZhdWZidXZjeddYdZd[Zkee#d\< e j d]¡dkZle j d^¡dkZme j d_¡dkZne j d`¡dkZodaZpee" e#db< dZqeƒ r(dne j dc¡dkZre j ddde¡ s¡ Zte j dfdg¡ s¡ Zue j dhdi¡ s¡ Zvedj e#dk< e j dldi¡ s¡ Zwedj e#dm< dZxdnZye j do¡dkZze j dp¡dkZ{e j dq¡dkZ|drZ}dsZ~dsZe j dt¡dkZ€e j du¡dkZe j dv¡dkZ‚e"e j dwd¡ƒZƒe j dxdy¡Z„e j dzd{¡Z…e/ed|œd}d~„Z†e/ed|œdd€„Z‡e/ed|œdd‚„Zˆe j dƒdi¡Z‰e	jŠj‹s†dnd1ZŒe j d„eŒ¡dkZe j d…d1¡dkZŽe j d†d¡dkZe j d‡¡dkZdˆZ‘d‰Z’dŠZ“dZ”dZ•e j d‹¡dkZ–dZ—e j dŒ¡dkZ˜ee#d< e j dŽ¡dkZ™ee#d< e j ddy¡Zše j d‘d1¡dkZ›ee#d’< daZœe j d“d¡dkZdMZžd”ZŸd”Z dŠZ¡dZ¢dŠZ£dZ¤dZ¥dZ¦e"e j d•d ¡ƒZ§e j d–d1¡dkZ¨dZ©dZªdZ«dZ¬dZ­dZ®dMZ¯dMZ°dZ±dZ²dZ³e j d—d1¡dkZ´d˜e	jµv p d™e	jµv Z¶eƒ pe¶Z·e j dšd¡dkZ¸dZ¹ee/ e#d›< dZºee/ e#dœ< e/dœddž„Z»e»ƒ Z¼e/e#dŸ< ed d¡ddZ½ee#d¢< dZ¾dXZ¿d£d¤gZÀeUeed¥ e/f  e#d¦< dZÁee#d§< G d¨d©„ d©ƒZÂedœdªd«„ZÃe"dœd¬d­„ZÄeƒ rÜdneÄƒ ZÅee" e#d®< eƒ ZÆee#d¯< ed°d±ddZÇee#d²< e j d³d1¡dkZÈee#d´< ee/ e#dµ< eƒ 	r z@d d¶lÉmÊZÊ eË	rteÊ Ìe jÍ ÎeË Ïd·e jÐ¡d¸¡¡ZÑn
eÊ Ìd¸¡ZÑW n eÒeÓf	yœ   dZÑY n0 ndZÑdaZÔe j d¹d¡dkZÕe j dºd¡dkZÖdZ×dZØd»ZÙd¼ZÚdZÛdZÜdZÝee#d½< e j d¾d1¡dkZÞdZßdZàdZádZâe j d¿dy¡ZãeãdykZäeãdk
r6dyneãZåe j dÀd¡Zæee/ e#dÁ< e j dÂ¡dkZçdZèe j dÃd1¡dkZéee#dÄ< dZêee#dÅ< dZëee#dÆ< dZìee#dÇ< dZíee#dÈ< dZîee#dÉ< dZïee#dÊ< e j dËd1¡dkZðe j dÌd1¡dkZñee#dÍ< dZòee#dÎ< G dÏdÐ„ dÐƒZóG dÑdÒ„ dÒƒZôG dÓdÔ„ dÔƒZõG dÕdÖ„ dÖƒZöG d×dØ„ dØƒZ÷dÐZøedÙ e#dÚ< dÒZùedÛ e#dÜ< G dÝdÞ„ dÞƒZúG dßdà„ dàƒZûg dá¢ZüeUe/ e#dâ< g dã¢ZýeUe/ e#dä< g ZþeUee	jÿe	jÿe	jÿgdf  e#då< G dædç„ dçƒZ erðd dèlT eeje ƒ dS )éé    N)ÚAnyÚCallableÚLiteralÚOptionalÚTYPE_CHECKINGÚUnion)Ú	is_fbcode)ÚConfigÚget_tristate_envÚinstall_config_moduleZTORCHINDUCTOR_INPLACE_PADDINGÚ1F)Úreturnc                   C   s   t dƒS )NZ#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHE©r
   © r   r   úD/var/www/auris/lib/python3.9/site-packages/torch/_inductor/config.pyÚfx_graph_remote_cache_default   s    r   c                   C   s,   t j d¡dkrdS t j d¡dkr(dS d S )NÚTORCHINDUCTOR_VEC_ISA_OKr   TÚ0F)ÚosÚenvironÚgetr   r   r   r   Úvec_isa_ok_default   s
    r   c                   C   s   t dƒS )NZ#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   Úautotune_remote_cache_default   s    r   c                   C   s   t dƒS )NZ+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   Ú%bundled_autotune_remote_cache_default   s    r   c                   C   s   t dtƒ sdnd ƒS )NZ/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r   r   r   r   r   Ú)bundle_triton_into_fx_graph_cache_default#   s    þr   c                  C   s@   d} dt jv rt j d¡dkS tƒ r8tj d¡}|| kS dS d S )Né   Z&TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionT©r   r   r   r   ÚtorchÚ_utils_internalÚjustknobs_getval_int)ZSTATIC_CUDA_LAUNCHER_VERSIONÚversionr   r   r   Ústatic_cuda_launcher_default*   s    
ÿr!   c                  C   sD   d} dt jv rt j d¡dkS tƒ r<d}tj |¡}|| kS dS d S )Nr   ZTORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr   )ZENABLE_PROLOGUE_FUSION_VERSIONÚjk_namer    r   r   r   Úprologue_fusion_enabled9   s    
r#   Z"TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2Ti  Úprecompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cacheZTORCHINDUCTOR_FX_GRAPH_CACHE)ÚjustknobÚenv_name_forceÚdefaultÚfx_graph_cacheÚfx_graph_remote_cacheÚ!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2Z-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITEÚnon_blocking_remote_cache_writeÚautotune_local_cacheÚautotune_remote_cacheÚbundled_autotune_remote_cachez)pytorch/remote_cache:force_disable_cachesZ"TORCHINDUCTOR_FORCE_DISABLE_CACHESÚforce_disable_cachesÚ&unsafe_skip_cache_dynamic_shape_guardsÚ!unsafe_marked_cacheable_functionsÚsleep_sec_TESTING_ONLYÚneeds_fixed_stride_order)r3   Zflexible_layoutÚ'triton_kernel_default_layout_constraintZTORCHINDUCTOR_CPP_WRAPPERr   Úcpp_wrapperZ(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEÚcpp_wrapper_build_separateÚcpp_cache_precompile_headersZTORCHINDUCTOR_ONLINE_SOFTMAXZTORCHINDUCTOR_SIZE_ASSERTSZTORCHINDUCTOR_NAN_ASSERTSZTORCHINDUCTOR_SCALAR_ASSERTSZTORCHINDUCTOR_ALIGNMENT_ASSERTSZTORCHINDUCTOR_MEMORY_PLANNINGZTORCHINDUCTOR_USE_FAST_MATHZTORCHINDUCTOR_MEMORY_POOLÚintermediates)Únoner8   ÚoutputsZcombinedÚmemory_poolÚpost_grad_custom_pre_passÚpost_grad_custom_post_passÚjoint_custom_pre_passÚjoint_custom_post_passÚpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNodeÚ_pre_fusion_custom_passÚ_post_fusion_custom_passÚpre_grad_fusion_optionsÚpost_grad_fusion_optionsZ"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-Cëâ6?r   )Zpre_gradZ	precisionZnum_iterationsZrequires_optimizerÚfx_passes_numeric_checkÚ	heuristic)r'   ÚtritonZatenrF   Úmixed_mm_choice)Zreorder_compute_for_overlapZ
sink_waitsZraise_commsÚ'reorder_for_compute_comm_overlap_passesÚreorder_prefetch_limitr'   i,  é   Z*TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r'   r&   r%   Úuse_experimental_benchmarkerZTORCHINDUCTOR_MAX_AUTOTUNEZ$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISEZTORCHINDUCTOR_MAX_AUTOTUNE_GEMMZ!TORCHINDUCTOR_DISABLE_DECOMPOSE_Ké
   Úautotune_num_choices_displayedZ"TORCHINDUCTOR_FORCE_SAME_PRECISIONZ(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPPZ(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITONZ,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEÚDEFAULT)rO   Z
EXHAUSTIVEÚmax_autotune_gemm_search_spaceZ,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEÚmax_autotune_flex_search_spacei    Z#TORCHINDUCTOR_SEARCH_AUTOTUNE_CACHEZTORCHINDUCTOR_SAVE_ARGSZ!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        Z#TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICEZ'TORCHINDUCTOR_COORDINATE_DESCENT_TUNINGZ5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONSZ'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUSZ#TORCHINDUCTOR_AUTOHEURISTIC_COLLECTÚ ZTORCHINDUCTOR_AUTOHEURISTIC_USEZmixed_mm)Únamer   c                 C   s   t | ƒpt| ƒS )N)Úcollect_autoheuristicÚuse_autoheuristic©rS   r   r   r   Úrun_autoheuristic  s    rW   c                 C   s   | t jjj d¡v S ©Nú,)r   Ú	_inductorÚconfigÚautoheuristic_collectÚsplitrV   r   r   r   rT     s    rT   c                 C   s   | t jjj d¡v S rX   )r   rZ   r[   Úautoheuristic_user]   rV   r   r   r   rU   
  s    rU   Z$TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATHZ!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONZTORCHINDUCTOR_FORCE_LAYOUT_OPTZ TORCHINDUCTOR_KEEP_OUTPUT_STRIDEZTORCHINDUCTOR_WARN_MIX_LAYOUTé   é   é   Z.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTZTORCHINDUCTOR_DEBUG_FUSIONÚdebug_fusionZTORCHINDUCTOR_BENCHMARK_FUSIONÚbenchmark_fusionZ#TORCHINDUCTOR_ENABLED_METRIC_TABLESZ(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONÚloop_ordering_after_fusionZ'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSIONé@   ZTORCHINDUCTOR_MIN_NUM_SPLITZTORCHINDUCTOR_BENCHMARK_KERNELZ%TORCHINDUCTOR_EMULATE_PRECISION_CASTSÚdevÚgitZ0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORÚadd_pre_grad_passesÚremove_pre_grad_passesc                  C   s4   dt jv rt jd } nd} | dv s0J d| › ƒ‚| S )NZTORCHINDUCTOR_WORKER_STARTÚ
subprocess)rj   ÚforkÚspawnzInvalid start method: )r   r   )Zstart_methodr   r   r   Údecide_worker_start_method½  s    

ürm   Úworker_start_methodz(pytorch/compiler:worker_suppress_loggingZ%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGÚworker_suppress_loggingZfuse_ddp_with_concat_opZschedule_comm_wait).NÚ_fuse_ddp_communication_passesÚ_micro_pipeline_tpc                   @   s&   e Zd ZU dZeed< dZeed< dS )Ú_collectiveFÚauto_selecti   Ú#one_shot_all_reduce_threshold_bytesN)Ú__name__Ú
__module__Ú__qualname__rs   ÚboolÚ__annotations__rt   Úintr   r   r   r   rr   í  s   
rr   c                  C   s   d} d}t j |¡}| |kS )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    r   z0pytorch/inductor:enable_parallel_compile_version)r   r   r   )ZENABLE_PARALLEL_COMPILE_VERSIONr"   r    r   r   r   Ú#parallel_compile_enabled_internallyò  s    r{   c                  C   s°   ddl } |  t¡}dtjv r8ttjd ƒ}| d|¡ nttjdkrRd}| d¡ nZt	ƒ rnt
ƒ snd}| d¡ n>ttd	ƒr†tt d¡ƒnt ¡ }|s–J ‚td
|ƒ}| d|¡ |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NZTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envÚwin32r   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodeÚsched_getaffinityé    zcompile_threads set to %d)ÚloggingÚ	getLoggerru   r   r   rz   ÚinfoÚsysÚplatformr   r{   ÚhasattrÚlenr}   Ú	cpu_countÚmin)r   ÚlogÚcompile_threadsr†   r   r   r   Údecide_compile_threads   s&    


ÿý
rŠ   r‰   Úuse_static_cuda_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernelsZ7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELSÚ)static_launch_user_defined_triton_kernelsZ)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERÚstrict_static_cuda_launcherÚglobal_cache_dir)ÚparutilÚ.zfb/cacheZTORCHINDUCTOR_SHAPE_PADDINGZ#TORCHINDUCTOR_COMPREHENSIVE_PADDINGé€   i   Úforce_shape_padZTORCHINDUCTOR_PERMUTE_FUSIONZTORCHINDUCTOR_PROFILEZTORCHINDUCTOR_PROFILE_OUTPUTÚprofile_bandwidth_outputZ3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGZTORCHINDUCTOR_FREEZINGÚfreezingÚfreezing_discard_parametersÚdecompose_mem_bound_mmÚassume_aligned_inputsÚ.unsafe_ignore_unsupported_triton_autotune_argsÚ"check_stack_no_cycles_TESTING_ONLYÚ*always_complex_memory_overlap_TESTING_ONLYZ*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGZTORCHINDUCTOR_ANNOTATE_TRAININGÚannotate_trainingÚ)enable_caching_generated_triton_templatesc                   @   sš  e Zd ZU dZej dd¡dkZej dd¡dkZdZ	e
e ed< eej dd	¡ƒZdej d
ejdkrldnd¡fZeed ef ed< ej dd¡dkZej dd¡dkZdZe
e ed< dZe
e ed< edƒZe
e ed< dZed ed< eej dd¡ƒZej dd¡dkZej dd¡dkZej dd¡Zej dd¡dkZ dZ!eej d d¡ƒZ"ej d!d¡Z#ej d"d¡Z$d#Z%dZ&ej d$d¡dkZ'dZ(dS )%ÚcppéÿÿÿÿZ$TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   Z!TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   NÚsimdlenZ TORCHINDUCTOR_CPP_MIN_CHUNK_SIZEZ4096ÚCXXÚdarwinzclang++zg++ÚcxxZ'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILEZ TORCHINDUCTOR_CPP_WEIGHT_PREPACKÚinject_relu_bug_TESTING_ONLYÚinject_log1p_bug_TESTING_ONLYr   Ú
vec_isa_okÚoriginal_aten©r   r¦   Zinductor_nodeÚdescriptive_namesZ,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZEZ16Z-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUMZ-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAGZ5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGÚoffZ)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICFZ#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICESZ%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKINGZ%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORSTZ$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH))ru   rv   rw   Úthreadsr   r   r   Zno_redundant_loopsZdynamic_threadsrŸ   r   rz   ry   Zmin_chunk_sizer‚   rƒ   r¢   Útupler   ÚstrZenable_kernel_profileZweight_prepackr£   r¤   r
   r¥   rx   r¨   Zmax_horizontal_fusion_sizeZfallback_scatter_reduce_sumZenable_unsafe_math_opt_flagZ#enable_floating_point_contract_flagZenable_tiling_heuristicsZenable_grouped_gemm_templateZgemm_max_k_slicesZgemm_cache_blockingZgemm_thread_factorsZenable_loop_tail_vecZenable_concat_linearZuse_decompose_tanhZuse_small_dequant_bufferr   r   r   r   r   Ð  sL   
ÿþÿÿÿÿÿÿÿÿr   c                   @   sè  e Zd ZU ej d¡dkZdZdZdZ	e
eeeeedf f   ed< dZdZeƒ rXdndZdZd	Ze
e ed
< dZdZdZdZdZdZdZej deƒ sždnd¡dkZeed< dZe
e ed< dZeed< dZ dZ!dZ"e
e ed< dZ#eed< dZ$eed< dZ%dZ&ej dd¡dkZ'ej dd¡dkZ(dZ)e*d ed< ej dd¡dkZ+ej dd¡dkZ,dZ-eej dd¡ƒZ.e*d ed< ej dd¡dkZ/dZ0dZ1dZ2eed < dZ3dZ4e
e5 ed!< dZ6ej d"d¡dkZ7ej d#d¡dkZ8dZ9dS )$rG   ZTORCHINDUCTOR_CUDAGRAPHSr   TFN.Úcudagraph_capture_sizesr‘   é2   Ú"cudagraph_dynamic_shape_warn_limitZ&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   Úcoalesce_tiling_analysisÚ	max_tilesÚprefer_nd_tilingÚautotune_at_compile_timeÚautotune_with_sample_inputsÚtile_reductionsZ!TORCHINDUCTOR_UNIQUE_KERNEL_NAMESZ&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr¦   r§   r¨   Z#TORCHINDUCTOR_PERSISTENT_REDUCTIONSZ$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSZTORCHINDUCTOR_MULTI_KERNEL)r   r   é   é   Úmulti_kernelZTORCHINDUCTOR_DIVISIBLE_BY_16é   é   Úspill_thresholdr£   ZENABLE_PERSISTENT_TMA_MATMULZTORCHINDUCTOR_SKIP_L1):ru   rv   rw   r   r   r   Z
cudagraphsZcudagraph_treesZcudagraph_skip_dynamic_graphsr­   r   r«   r   rz   ry   Zslow_path_cudagraph_assertsZ!cudagraph_trees_history_recordingr   Z cudagraph_support_input_mutationZ#cudagraph_unexpected_rerecord_limitr¯   Zforce_cudagraph_syncZforce_cudagraphs_warmupZfast_path_cudagraph_assertsZskip_cudagraph_warmupZdebug_sync_graphZdebug_sync_kernelZdense_indexingr°   rx   r±   r²   Zautotune_pointwiseZautotune_cublasLtr³   r´   rµ   Z tiling_prevents_pointwise_fusionZ tiling_prevents_reduction_fusionZunique_kernel_namesZunique_user_kernel_namesr¨   r   Zpersistent_reductionsZcooperative_reductionsZforce_cooperative_reductionsr¸   Zdivisible_by_16Zmin_split_scan_rblockZstore_cubinr»   Zuse_block_ptrr£   r¬   Zcodegen_upcast_to_fp32Zenable_persistent_tma_matmulZskip_l1_cacheZ.disallow_failing_autotune_kernels_TESTING_ONLYr   r   r   r   rG   F  sp   
$ÿýÿ
ÿ
ÿ	ÿÿÿÿÿrG   c                   @   s¤  e Zd ZU dZdZej dd¡dkZej dd¡Z	ej dd¡Z
ed	 ed
< ej dd¡ZdZdZdZeed< dZeed< dZeed< dZeed< i Zeeef ed< ej dd¡dkZeed< ej dd¡dkZeed< eej dd¡ƒZeed< i Zeeef ed< dZeed< dZeed< dZ eed< dZ!eed< e"ƒ  Z#eed < dZ$eed!< dZ%eed"< dZ&e'e ed#< i Z(ee)j*j+e,e f ed$< dZ-e'e,e  ed%< dS )&Úaot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    rR   ZAOT_INDUCTOR_DEBUG_COMPILEr   r   Z&AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELZO1Z-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   Ú2Ú3Ú debug_intermediate_value_printerZ&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFÚuse_runtime_constant_foldingÚforce_mmap_weightsÚpackageÚpackage_cpp_onlyÚmetadataZ/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATIONÚ#raise_error_on_ignored_optimizationZDUMP_AOTI_MINIFIERÚdump_aoti_minifierZAOTINDUCTOR_REPRO_LEVELr¶   Úrepro_levelÚpresetsÚallow_stack_allocationÚuse_minimal_arrayref_interfaceTÚpackage_constants_in_soÚpackage_constants_on_diskÚprecompile_headersÚembed_kernel_binaryÚemit_multi_arch_kernelÚmodel_name_for_generated_filesÚcustom_ops_to_c_shimsÚcustom_op_libs).ru   rv   rw   Ú__doc__Zoutput_pathr   r   r   Zdebug_compileZcompile_wrapper_opt_levelr¿   r   ry   Zfiltered_kernel_namesZserialized_in_specZserialized_out_specrÀ   rx   rÁ   rÂ   rÃ   rÄ   Údictr¬   rÅ   rÆ   rz   rÇ   rÈ   r   rÉ   rÊ   rË   rÌ   r   rÍ   rÎ   rÏ   rÐ   r   rÑ   r   Z_opsZ
OpOverloadÚlistrÒ   r   r   r   r   r¼     sD   

ÿ	ÿÿÿ
r¼   c                
   @   s¸  e Zd ZU dZdZee ed< dZee ed< dZ	e
d ed< dZdZdZdZej d	ej ej ej ej¡d
¡¡¡ZdZee ed< g d¢Zee ed< ej dd¡dkZdZdZee ed< dZ eed< ej dd¡dkZ!e"ed< ej d¡Z#ee ed< ej d¡Z$ee ed< ej dd¡Z%eed< ej d¡Z&ee ed< ej dd¡dkZ'e"ed< ej d d¡dkZ(e"ed!< ej d"d#¡Z)eed$< d%Z*e"ed&< dZ+e"ed'< dZ,e"ed(< dS ))Úcudaz9Settings for cuda backend, today this consists of cutlassNÚarchr    ú-O1)ú-O0rØ   ú-O2ú-O3z-OSÚcompile_opt_levelFZTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/Úcutlass_max_profiling_configs)r   r¶   r_   ra   Ú%cutlass_max_profiling_swizzle_optionsZCUTLASS_EPILOGUE_FUSIONr   r   Úcuda_cxxr   Úcutlass_backend_min_gemm_sizeZ/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEÚgenerate_test_runnerZTORCHINDUCTOR_CUTLASS_ALLOWLISTÚcutlass_op_allowlist_regexZTORCHINDUCTOR_CUTLASS_DENYLISTÚcutlass_op_denylist_regexZ)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELÚcutlass_instantiation_levelZTORCHINDUCTOR_CUTLASS_PRESETSÚcutlass_presetsZ+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDÚcutlass_hash_with_compile_cmdZ"TORCHINDUCTOR_CUTLASS_PRESCREENINGÚcutlass_prescreeningZ!TORCHINDUCTOR_CUTLASS_ENABLED_OPSÚallÚcutlass_enabled_opsTÚuse_binary_remote_cacheÚupload_to_binary_remote_cacheÚbinary_remote_cache_force_write)-ru   rv   rw   rÓ   r×   r   r¬   ry   r    rÜ   r   Zenable_cuda_ltoZenable_ptxas_infoZenable_debug_infoÚuse_fast_mathr   r   r   ÚpathÚabspathÚjoinÚdirnamer   Ú__file__Zcutlass_dirrÝ   rz   rÞ   rÕ   Zcutlass_epilogue_fusion_enabledZcutlass_tma_onlyrß   rà   rá   rx   râ   rã   rä   rå   ræ   rç   ré   rê   rë   rì   r   r   r   r   rÖ     sV   
ÿþ
ÿÿ
ÿÿ
ÿÿ
ÿ
ÿrÖ   c                   @   sè   e Zd ZU g Zee ed< ddgZee ed< dZe	d ed< dZ
dZd	Zd	ZdZd
Zee ed< ej d¡Zej dd¡dkZeed< d
Zee ed< d
Zee ed< d
Zee ed< dZeed< d
Zeee  ed< dZeed< d
S )Úrocmr×   Zgfx90aZgfx942Úck_supported_archrÚ   )	rÙ   rØ   rÚ   rÛ   z-Osz-Ozz-Ominz-Ofastz-OmaxrÜ   FTNÚ	rocm_homeZTORCHINDUCTOR_CK_DIRZ-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   rá   Ún_max_profiling_configsÚck_max_profiling_configsÚck_tile_max_profiling_configsÚuse_preselected_instancesÚkBatch_sweeprº   Úsplit_k_threshold)ru   rv   rw   r×   rÕ   r¬   ry   rô   rÜ   r   Zis_debugZ
save_tempsrí   Zflush_denormalsZprint_kernel_resource_usagerõ   r   r   r   r   Zck_dirrá   rx   rö   rz   r÷   rø   rù   rú   rû   r   r   r   r   ró     s*   
þÿÿ
ró   )r   rG   ÚhalideÚcpu_backend)rG   rü   Úcuda_backendc                   @   sB   e Zd ZU dZdZdZed ed< dZed ed< dZ	dZ
dZd	S )
rü   Úhostz	host-cudaÚAnderson2021)r   ZLi2018Ú	Adams2019ZMullapudi2016Úscheduler_cudar  Úscheduler_cpuFN)ru   rv   rw   Z
cpu_targetZ
gpu_targetr  r   ry   r  ZassertsÚdebugZscan_kernelsr   r   r   r   rü   N  s   
ÿÿrü   c                   @   sÚ   e Zd ZU ej dd¡dkZej dd¡dkZdZe	e
 ed< dZdZdZdZdZdZdZej d	d¡dkZej d
d¡dkZej dd¡Zej dd¡ZdZdZe	ee
gdf  ed< ej dd¡dkZdZeed< dS )ÚtraceZTORCH_COMPILE_DEBUGr   r   ZTORCH_COMPILE_DEBUG_SAVE_REALNÚ	debug_dirFTZINDUCTOR_POST_FUSION_SVGZINDUCTOR_ORIG_FX_SVGZINDUCTOR_DOT_GRAPH_SHAPE_SVGZ INDUCTOR_LOG_URL_FOR_GRAPH_XFORMÚ
upload_tarZLOG_AUTOTUNE_RESULTSÚ1log_inductor_triton_kernel_to_post_grad_node_info)ru   rv   rw   r   r   r   ZenabledZsave_real_tensorsr  r   r¬   ry   Z	debug_logZinfo_logZfx_graphZfx_graph_transformedZir_pre_fusionZir_post_fusionZoutput_codeZgraph_diagramZdraw_orig_fx_graphZdot_graph_shapeZlog_url_for_graph_xformZcompile_profiler  r   Zlog_autotuning_resultsr  rx   r   r   r   r   r  j  s$   

r  )
ztrace.upload_tarr>   r?   r@   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierr<   r=   rp   rA   Ú_save_config_ignore)r  zcuda.cutlass_dirrn   r‰   r=   r<   rp   rA   rš   r(   r)   r,   r-   Ú_cache_config_ignore_prefixÚexternal_matmulc                   @   sV   e Zd ZU dZeed< dZee ed< dZ	dZ
dZee ed< dZee ed< dZdS )Útest_configsFÚ%force_extern_kernel_in_multi_templateNÚmax_mm_configsÚautotune_choice_name_regexÚautotune_choice_desc_regex)ru   rv   rw   r  rx   ry   r  r   rz   Zruntime_triton_dtype_assertZstatic_cpp_dtype_assertr  r¬   r  Z*graphsafe_rng_func_ignores_fallback_randomr   r   r   r   r  Õ  s   
r  )Ú*(  r   r‚   Útypingr   r   r   r   r   r   r   Z!torch._inductor.custom_graph_passZtorch._environmentr   Ztorch.utils._config_moduler	   r
   r   r   r   Zinplace_paddingZcan_inplace_pad_graph_inputrx   r   r   r   r   r   r!   r#   Zenable_auto_functionalized_v2r  Zdisable_progressZverbose_progressr$   rz   ry   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   rÔ   r¬   r2   r4   r5   r6   r7   Zonline_softmaxZdceZstatic_weight_shapesZsize_assertsZnan_assertsZscalar_assertsZalignment_assertsZpick_loop_ordersZinplace_buffersZallow_buffer_reuseZmemory_planningrí   Zbfloat16_atomic_adds_enabledr;   Zbenchmark_harnessZepilogue_fusionZprologue_fusionZepilogue_fusion_firstZpattern_matcherZb2b_gemm_passr<   rZ   Zcustom_graph_passZCustomGraphPassTyper=   r>   ZfxZGraphr?   r@   ÚgraphrA   rÕ   rB   Zsplit_cat_fx_passesZ efficient_conv_bn_eval_fx_passesZis_predispatchZgroup_fusionZbatch_fusionrC   rD   Zreorder_for_localityZdynamic_scale_rblockZforce_fuse_int_mm_with_mulZuse_mixed_mmrE   rH   Z reorder_for_compute_comm_overlaprI   rJ   Zreorder_for_peak_memoryZestimate_op_runtimeZintra_node_bwZinter_node_bwrL   Zmax_autotuneZmax_autotune_pointwiseZmax_autotune_gemmZdisable_decompose_krN   Zgraph_partitionZforce_same_precisionÚupperZmax_autotune_gemm_backendsZmax_autotune_conv_backendsrP   rQ   Zautotune_fallback_to_atenZunbacked_symint_fallbackZsearch_autotune_cacheZ	save_argsZautotune_in_subprocZ+max_autotune_subproc_result_timeout_secondsZ-max_autotune_subproc_graceful_timeout_secondsZ.max_autotune_subproc_terminate_timeout_secondsZautotune_multi_deviceZcoordinate_descent_tuningZ'coordinate_descent_check_all_directionsZ coordinate_descent_search_radiusr\   r^   rW   rT   rU   Zautoheuristic_log_pathr    ZhipZlayout_opt_defaultZlayout_optimizationZforce_layout_optimizationZkeep_output_strideZwarn_mix_layoutZrealize_reads_thresholdZrealize_opcount_thresholdZrealize_acc_reads_thresholdZfallback_randomZimplicit_fallbacksZ assume_unaligned_fallback_outputZaggressive_fusionrb   rc   Zenabled_metric_tablesrd   Zscore_fusion_memory_thresholdZbenchmark_epilogue_fusionZ max_epilogue_benchmarked_choicesZmax_fusion_sizeZ)max_fusion_buffer_group_pairwise_attemptsZmax_pointwise_cat_inputsZforce_pointwise_catZunroll_reductions_thresholdZcomment_originZconv_1x1_as_mmZsplit_reductionsZmin_num_splitZbenchmark_kernelZconstant_and_index_propagationZalways_keep_tensor_constantsZassert_indirect_indexingZcompute_all_boundsZcombo_kernelsZbenchmark_combo_kernelZcombo_kernels_autotuneZcombo_kernel_allow_mixed_sizesZ#combo_kernel_foreach_dynamic_shapesZjoint_graph_constant_foldingZdebug_index_assertsZemulate_precision_castsÚ__version__Zis_nightly_or_sourceZdeveloper_warningsZ"optimize_scatter_upon_const_tensorrh   ri   rm   rn   ro   Z_fuse_ddp_communicationZ_fuse_ddp_bucket_sizerp   rq   rr   r{   rŠ   r‰   r‹   rŒ   r   Zlibfb.pyr   Ú__package__Zget_dir_pathrî   rð   ÚreplaceÚseprŽ   Ú
ValueErrorÚImportErrorZkernel_name_max_opsZshape_paddingZcomprehensive_paddingZpad_channels_lastZdisable_padding_cpuZpadding_alignment_bytesZpadding_stride_thresholdZpad_outputsZbw_outputs_user_visibler’   Zpermute_fusionZprofiler_mark_wrapper_callZgenerate_intermediate_hooksZdebug_ir_tracebackZ_raise_error_for_testingZ_profile_varZprofile_bandwidthZprofile_bandwidth_regexr“   Z/profile_bandwidth_with_do_bench_using_profilingZdisable_cpp_codegenr”   r•   r–   r—   r˜   r™   rš   Zenable_linear_binary_foldingr›   rœ   r   rG   r¼   rÖ   ró   rý   rþ   rü   r  r	  r
  r  ZTensorr  Ztorch.utils._config_typingÚmodulesru   r   r   r   r   Ú<module>   sŒ  
 ÿý
ÿý
ýþÿÿ
ÿÿÿ 
ûÿÿÿûÿÿÿüøÿÿÿÿÿýÿÿÿÿÿÿÿÿÿÿÿÿ
ÿÿ	ÿýþ%ýÿ
ÿÿÿÿÿv Gu CC$
