
    [Th                     t   % S SK r S SKrS SKJrJrJrJrJrJr  S SK	r	S SK
r	S SKJr  S SKJrJrJr  \ R"                  R%                  SS5      S:H  rSrS\\   4S	 jrS\\   4S
 jrS\\   4S jrS\\   4S jrS\\   4S jrS\4S jr\ R"                  R%                  SS5      S:H  rSrSrSr\" SSSS9r \\!S'   \" 5       r"\\   \!S'   \" 5       r#\\   \!S'   Sr$\\!S'   \" 5       r%\\   \!S'   \" 5       r&\\   \!S'   \ R"                  R%                  S5      S:H  r'\\!S'   Sr(\\)   \!S'   Sr*\S   \!S'   Sr+\S   \!S '   \ R"                  R%                  S!S"5      S:H  r,\\!S#'   \ R"                  R%                  S$S5      S:H  r-Sr.Sr/\ R"                  R%                  S%S5      S:H  r0\ R"                  R%                  S&5      S:H  r1\ R"                  R%                  S'S5      S:H  r2Sr3Sr4Sr5\ R"                  R%                  S(S"5      S:H  r6\ R"                  R%                  S)5      S:H  r7\ R"                  R%                  S*S+5      r8\S,   \!S-'   Sr9Sr:\" 5       r;Sr<Sr=Sr>Sr?\	R                  R                  R                  \!S.'   SrC\	R                  R                  R                  \!S/'   SrD\\\	R                  R                  /S4      \!S0'   SrG\\\	R                  R                  /S4      \!S1'   SrH\\\	R                  R                  R                  /S4      \!S2'   SrJ\\\KS3   /\KS3   4      \!S4'   SrLSrMSrNSrOSrP0 rQ\R\S\R\S\4   4   \!S5'   0 rT\R\S\R\S\4   4   \!S6'   SrU\ R"                  R%                  S7S5      S:H  rVSrWSrXSS8S9SS:.rY\R\S\4   \!S;'   S<rZ\S=   \!S>'   Sr[/ S?Qr\\K\\S\\KS3   /\KS3   4   4      \!S@'   Sr]SAr^SBr_SCr`\" SSDSESF9ra\\!SG'   \ R"                  R%                  SH5      S:H  rb\ R"                  R%                  SI5      S:H  rc\ R"                  R%                  SJ5      S:H  rdSKre\\)   \!SL'   Srf\" 5       (       a  SO\ R"                  R%                  SM5      S:H  rg\ R"                  R%                  SNSO5      R                  5       ri\ R"                  R%                  SPSQ5      R                  5       rj\ R"                  R%                  SRSS5      R                  5       rk\ST   \!SU'   \ R"                  R%                  SVS5      S:H  rlSWrm\ R"                  R%                  SX5      S:H  rn\ R"                  R%                  SY5      S:H  ro\ R"                  R%                  SZ5      S:H  rpS[rqS\rrS]rs\ R"                  R%                  S^5      S:H  rt\ R"                  R%                  S_5      S:H  ru\ R"                  R%                  S`5      S:H  rv\)" \ R"                  R%                  SaS5      5      rw\ R"                  R%                  SbSc5      rx\ R"                  R%                  SdSe5      rySf\SS\4Sg jrzSf\SS\4Sh jr{Sf\SS\4Si jr|\ R"                  R%                  SjSS5      r}\	R                  R                  (       d  SOS"r\ R"                  R%                  Sk\5      S:H  r\ R"                  R%                  SlS"5      S:H  r\ R"                  R%                  SmS5      S:H  r\ R"                  R%                  Sn5      S:H  rSorSprSqrSrSrSr\ R"                  R%                  Sr5      S:H  r\\!Ss'   \ R"                  R%                  St5      S:H  r\\!Su'   \ R"                  R%                  SvSc5      r\ R"                  R%                  SwS"5      S:H  r\\!Sx'   SKr\ R"                  R%                  SyS5      S:H  rS9rSzrSqrSrSqrSrSrSr\ R"                  R%                  S{S"5      S:H  rSrSrSrSrSrSrS9rS9rSrSrSr\ R"                  R%                  S|S"5      S:H  rS}\	GRL                  ;   =(       d    S~\	GRL                  ;   r\" 5       =(       d    \r\ R"                  R%                  SS5      S:H  rSr\\S   \!S'   Sr\\S   \!S'   S\S4S jr\" 5       r\S\!S'   SrSCrSS/r\K\\S   \S4      \!S'   Sr\\!S'    " S S5      rS\4S jrS\)4S jr\" 5       (       a  SO\" 5       r\\)   \!S'   \\S   \!S'   \" 5       (       an   S SKJr  \(       aL  \GRr                  " \ GRt                  GRw                  \GRy                  S\ GRz                  5      S5      5      rO\GRr                  " S5      rOSrSKr\ R"                  R%                  SS5      S:H  r\ R"                  R%                  SS5      S:H  rSrSrSrSrSrSrSr\\!S'   \ R"                  R%                  SS"5      S:H  rSrSrSrSr\ R"                  R%                  SSc5      r\Sc:g  r\S:X  a  ScO\r\ R"                  R%                  SS5      r\\S   \!S'   \ R"                  R%                  S5      S:H  rSr\ R"                  R%                  SS"5      S:H  r\\!S'   Sr\\!S'   Sr\\!S'   Sr\\!S'   Sr\\!S'   Sr\\!S'   Sr\\!S'   \ R"                  R%                  SS"5      S:H  r\ R"                  R%                  SS"5      S:H  r\\!S'    " S S5      r " S S5      r " S S5      r " S S5      r " S S5      rSr\S   \!S'   Sr\S   \!S'    " S S5      r " S S5      r/ SQr\K\S   \!S'   / SQr\K\S   \!S'   / r\K\\	GR                  \	GR                  \	GR                  /S4      \!S'    " S S5      r\(       a  S SK7  \" \GR                  \   5        g! \\4 a    Sr GNYf = f)    N)AnyCallableLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinstall_config_moduleTORCHINDUCTOR_INPLACE_PADDING1Freturnc                      [        S5      $ )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr        N/var/www/auris/envauris/lib/python3.13/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBr   c                      [         R                  R                  S5      S:X  a  g[         R                  R                  S5      S:X  a  gg )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr      s7    	zz~~01S8	zz~~01S8r   c                      [        S5      $ )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr!      r   r   c                      [        S5      $ )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr$      s    IJJr   c                  D    [        S[        5       (       d  S5      $ S 5      $ )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r   r	   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr'   #   s(    9KK %) r   c                      Sn S[         R                  ;   a"  [         R                  R                  S5      S:H  $ [        5       (       a%  Sn[        R
                  R                  U5      nX :*  $ g)Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionT)r   r   r   r	   torch_utils_internaljustknobs_getval_int)ENABLE_PROLOGUE_FUSION_VERSIONjk_nameversions      r   prologue_fusion_enabledr0   *   s[    %&"&"**4zz~~=>#EE	<''<<WE88r   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2Tz0pytorch/remote_cache:enable_local_fx_graph_cacheTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_forcedefaultfx_graph_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cacheautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cache"TORCHINDUCTOR_FORCE_DISABLE_CACHESforce_disable_cachessleep_sec_TESTING_ONLYneeds_fixed_stride_order)r?   flexible_layout#custom_op_default_layout_constraint'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapperTORCHINDUCTOR_ONLINE_SOFTMAXTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)nonerL   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r5   tritonatenr`   mixed_mm_choice)reorder_compute_for_overlap
sink_waitsraise_comms'reorder_for_compute_comm_overlap_passesr5   i,     *TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r5   r4   r3   use_experimental_benchmarkerTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM
   autotune_num_choices_displayed"TORCHINDUCTOR_FORCE_SAME_PRECISION(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)rt   
EXHAUSTIVEmax_autotune_gemm_search_space'TORCHINDUCTOR_AUTOTUNE_FALLBACK_TO_ATENi    #TORCHINDUCTOR_SEARCH_AUTOTUNE_CACHETORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g      ?g       @#TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmnamec                 <    [        U 5      =(       d    [        U 5      $ )N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr     s     &A*;D*AAr   c                 n    U [         R                  R                  R                  R	                  S5      ;   $ N,)r*   	_inductorconfigautoheuristic_collectsplitr   s    r   r   r     s(    5??))??EEcJJJr   c                 n    U [         R                  R                  R                  R	                  S5      ;   $ r   )r*   r   r   autoheuristic_user   r   s    r   r   r     s(    5??));;AA#FFFr   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT         TORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusion'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   TORCHINDUCTOR_BENCHMARK_KERNEL%TORCHINDUCTOR_EMULATE_PRECISION_CASTSdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesc                  z    S[         R                  ;   a  [         R                  S   n OSn U S;   d
   SU  35       eU $ )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr   o  sR    #rzz1zz">?#   / 
 ~.	/ 
 r   worker_start_methodfuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   2    \ rS rSr% Sr\\S'   Sr\\S'   Sr	g)_collectivei  Fauto_selecti   #one_shot_all_reduce_threshold_bytesr   N)
__name__
__module____qualname____firstlineno__r   bool__annotations__r   int__static_attributes__r   r   r   r   r     s    K/9'9r   r   c                  P    Sn Sn[         R                  R                  U5      nX:  $ )a  
TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
knob to enable / disable. The justknob should not be performed at import, however.
So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
async_compile.py.
rZ   z0pytorch/inductor:enable_parallel_compile_version)r*   r+   r,   )ENABLE_PARALLEL_COMPILE_VERSIONr.   r/   s      r   #parallel_compile_enabled_internallyr     s.     '(#@G##88AG*55r   c                  X   SSK n U R                  [        5      nS[        R                  ;   a0  [        [        R                  S   5      nUR                  SU5        U$ [        R                  S:X  a  SnUR                  S5        U$ [        5       (       a$  [        5       (       d  SnUR                  S5        U$ [        [        S	5      (       a  [        [        R                  " S5      5      O[        R                  " 5       nU(       d   e[        S
U5      nUR                  SU5        U$ )a	  
Here are the precedence to decide compile_threads
1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
   setting this to 1 to make pdb happy.
2. Set to 1 if it's win32 platform
3. decide by the number of CPU cores
r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32rZ   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr	   r   hasattrlenr   	cpu_countmin)r   logcompile_threadsr   s       r   decide_compile_threadsr     s     

H
%C&"**4bjj)HIJ4oF" ! 
	 56  
@BB56  r.// $$Q'( 	
 yb),,o>r   r   global_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmassume_aligned_inputs.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_trainingc                   @   \ rS rSr% Sr\R                  R                  SS5      S:H  r\R                  R                  SS5      S:H  r	Sr
\\   \S'   \" \R                  R                  S	S
5      5      rS\R                  R                  S\R                   S:X  a  SOS5      4r\\S   \4   \S'   \R                  R                  SS5      S:H  r\R                  R                  SS5      S:H  rSr\\   \S'   Sr\\   \S'   \" S5      r\\   \S'   Sr\\\S   4   \S'   \" \R                  R                  SS5      5      r\R                  R                  SS5      S:H  r\R                  R                  SS5      S:H  r \R                  R                  SS5      r!\R                  R                  SS5      S:H  r"S r#\" \R                  R                  S!S5      5      r$\R                  R                  S"S5      r%\R                  R                  S#S5      r&S$r'S r(S%r)g)&cppig  $TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE4096CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr*   r   inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORSTr   )*r   r   r   r   threadsr   r   r   no_redundant_loopsdynamic_threadsr   r   r   r   min_chunk_sizer   r   r   tupler   strenable_kernel_profileweight_prepackr   r   r   r   r   r   r   max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearr   r   r   r   r   r   g  sB   G
 	

=sCsJ  jjnn%H#NRUUO!GXc]!(JFSTN 	


u3<<8+CiO&Cwt}c!	"  	

@#F#M 
 ZZ^^$FLPSSN
 37 (3-637!8C=7 "22L!MJM
 	 ug?@@ 
 "%


EtL" 	

FLPSS   	

FLPSS   +-**..?+' 	

BCHCO 
 $)  BJJNN+PRUVW **..)PRVW **..)PRVW   !r   r   c                      \ rS rSr% \R
                  R                  S5      S:H  rSrSr	Sr
Sr\" 5       (       a  SOSrSrSr\\   \S'   SrSrSrSrSrSrSrS	rSr\\S
'   SrSrSr\\   \S'   Sr \\S'   Sr!Sr"\R
                  R                  SS5      S:H  r#\R
                  R                  SS5      S:H  r$Sr%\&\\'S   4   \S'   \R
                  R                  SS5      S:H  r(\R
                  R                  SS5      S:H  r)Sr*\" \R
                  R                  SS5      5      r+\'S   \S'   Sr,Sr-Sr.Sr/\\S'   Sr0Sr1\\2   \S'   Sr3\R
                  R                  SS5      S:H  r4\R
                  R                  SS5      S:H  r5Sr6Sr7g) ra   i  TORCHINDUCTOR_CUDAGRAPHSr   TFr   2   "cudagraph_dynamic_shape_warn_limit   prefer_nd_tilingNautotune_at_compile_timetile_reductions!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr   r   r   r   #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   rZ   r"     multi_kernel      spill_thresholdr   ENABLE_PERSISTENT_TMA_MATMULTORCHINDUCTOR_SKIP_L1r   )8r   r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsslow_path_cudagraph_asserts!cudagraph_trees_history_recordingr	    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr!  r   r   r   force_cudagraph_syncforce_cudagraphs_warmupfast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexing	max_tilesr#  r   autotune_pointwiseautotune_cublasLtr$  r%   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr   r   r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsr,  divisible_by_16min_split_scan_rblockstore_cubinr/  use_block_ptrr   r  codegen_upcast_to_fp32enable_persistent_tma_matmulskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYr   r   r   r   ra   ra     s@    :;sBJ O %*! #' ).% 1:u$ +.' 9;&: ! $ #( "   N I #d"   04htn3 "OT! (,$'+$ 	

:C@CG  	

?EL  	 ug?@@  	

<cBcI  	

=sCsJ 
 $)  ),


3S9)L'*% 
 O   K OS M
 37 (3-6 "
 	

5s;sB ! JJNN#:C@CGM 6;2r   ra   c                      \ rS rSr% Sr\R                  R                  SS5      S:H  r\R                  R                  SS5      S:H  r	\R                  R                  SS5      r
\S   \S	'   \R                  R                  S
S5      rSrSrSr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   0 r\\\4   \S'   \R                  R                  SS5      S:H  r\\S'   \R                  R                  SS5      S:H  r\\S'   \" \R                  R                  SS5      5      r\\S'   0 r\\\4   \S'   Sr\\S'   Sr\\S'   Sr \\S'   Sr!g)aot_inductori  r   AOT_INDUCTOR_DEBUG_COMPILEr   r   $AOT_INDUCTOR_COMPILE_WRAPPER_WITH_O0-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightspackagepackage_cpp_onlymetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr"  repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interfaceTpackage_constants_in_sor   )"r   r   r   r   output_pathr   r   r   debug_compilecompile_wrapper_with_O0rY  r   r   filtered_kernel_namesserialized_in_specserialized_out_specr[  r   r\  r]  r^  r_  dictr  ra  rc  r   re  rf  r   rg  rh  ri  r   r   r   r   rS  rS    s~    KJJNN#?ELM
 	

=sCsJ  EGJJNN7E$g.@&A 
 JJNN0$   */ $.  %$GT"d"  "Hd38n! 	

H#NRUU ( 
  "zz~~.BCHCOO 2::>>*CQGHKH !GT#s(^  $)D( ,1"D0 %)T(r   rS  c            
          \ rS rSr% Sr\\   \S'   Sr\\   \S'   Sr	\
S   \S'   SrSrSrSr\R                   R#                  S	\R$                  R'                  \R$                  R)                  \R$                  R+                  \R.                  5      S
5      5      5      rSr\\   \S'   / SQr\\   \S'   Sr\\   \S'   Sr\\S'   \R                   R#                  SS5      S:H  r\ \S'   \R                   R#                  S5      r!\\   \S'   \R                   R#                  S5      r"\\   \S'   \R                   R#                  SS5      r#\\S'   Sr$g)cudai  Narchr/   -O1)-O0rt  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configs)rZ   r"  r   %cutlass_max_profiling_swizzle_optionscuda_cxxrZ   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   generate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_levelr   )%r   r   r   r   rs  r   r  r   r/   rx  r   enable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   r   r   pathabspathjoindirnamer*   __file__cutlass_dirrz  r   r{  listr|  r}  r  r   r  r  r  r   r   r   r   rr  rr    sn    D(3-
 "GXc]! EJw@AI O   M **..#
GGLL8:ST	
K 48!8C=7 8A)49@ #Hhsm" *+!3*
 	

H#NRUU $  13

)1  02zz~~(0x}  (*zz~~3S( r   rr  c                   F   \ rS rSr% / r\\   \S'   SS/r\\   \S'   Sr	\
S   \S'   S	rS	rS
rS
rS	rSr\\   \S'   \R&                  R)                  S5      r\R&                  R)                  SS5      S:H  r\\S'   Sr\\   \S'   S	r\\S'   Sr\\\      \S'   Sr\\S'   Srg)rocmi;  rs  gfx90agfx942ck_supported_archrv  )	ru  rt  rv  rw  z-Osz-Ozz-Ominz-Ofastz-Omaxrx  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsuse_preselected_instanceskBatch_sweepr.  split_k_thresholdr   )r   r   r   r   rs  r  r  r   r  rx  r   is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   r   ck_dirr  r   r  r   r  r  r  r   r   r   r   r  r  ;  s     D$s) %-h#7tCy7 	 wL 
 H J M O #(  $Ix}# ZZ^^23F 	

FLPSS $ 
 .2Xc]1 ',t+ )-L(49%,  sr   r  )r   ra   halidecpu_backend)ra   r  cuda_backendc                   R    \ rS rSr% SrSrSr\S   \S'   Sr	\S   \S'   S	r
S	rS	rS
rg)r  i|  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFr   N)r   r   r   r   
cpu_target
gpu_targetr  r   r   r  assertsdebugscan_kernelsr   r   r   r   r  r  |  sQ    J J
 	 GRS  	 7QR 
 G E Lr   r  c                      \ rS rSr% \R
                  R                  SS5      S:H  r\R
                  R                  SS5      S:H  rSr	\
\   \S'   SrSrS	rS	rS	rS	rS	r\R
                  R                  S
S5      S:H  r\R
                  R                  SS5      S:H  r\R
                  R                  SS5      r\R
                  R                  SS5      rSrSr\
\\/S4      \S'   Sr\\S'   S	r\\S'   Srg)tracei  TORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarlog_autotuning_results1log_inductor_triton_kernel_to_post_grad_node_infor   )r   r   r   r   r   r   r   enabledsave_real_tensorsr  r   r  r   	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   r  r   r  r   r   r   r   r  r    s!   jjnn2C8C?G 

'FLPSS  $Ix}# I H H   M N K JJNN#=sCsJM (>DK jjnn%CTJO !jjnn-OQUV O 37J3%+./6#(D( ?C5tBr   r  )ztrace.upload_tarrS   rT   rU   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifier_save_config_ignore)r  zcuda.cutlass_dirr   r   rR   rQ   r   _cache_config_ignore_prefixexternal_matmulc                   h    \ rS rSr% Sr\\S'   Sr\\	   \S'   Sr
Sr\\   \S'   Sr\\   \S'   SrSrg)	test_configsi  F%force_extern_kernel_in_multi_templateNmax_mm_configsautotune_choice_name_regexautotune_choice_desc_regexr   )r   r   r   r   r  r   r   r  r   r   runtime_triton_dtype_assertr  r  r  *graphsafe_rng_func_ignores_fallback_randomr   r   r   r   r  r    sG    27)47$(NHSM("' 15404416.r   r  )*)r   r   typingr   r   r   r   r   r   r*   !torch._inductor.custom_graph_passtorch._environmentr	   torch.utils._config_moduler
   r   r   r   r   inplace_paddingcan_inplace_pad_graph_inputr   r   r   r!   r$   r'   r0   enable_auto_functionalized_v2r  disable_progressverbose_progressr6   r   r7   r8   r9   r:   r;   r=   r>   r   rA   rB   rD   online_softmaxdcestatic_weight_shapessize_assertsnan_assertsscalar_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  rP   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passrQ   r   custom_graph_passCustomGraphPassTyperR   rS   fxGraphrT   rU   graphrV   r  split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrW   rp  r  rX   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmr_   rc    reorder_for_compute_comm_overlaprg   reorder_for_peak_memoryestimate_op_runtimeintra_node_bwinter_node_bwrj   max_autotunemax_autotune_pointwisemax_autotune_gemmro   graph_partitionforce_same_precisionuppermax_autotune_gemm_backendsmax_autotune_conv_backendsrv   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   r   r   r   autoheuristic_log_pathr/   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdfallback_randomimplicit_fallbacksaggressive_fusionr   r   enabled_metric_tablesr   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_sizemax_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapesjoint_graph_constant_foldingdebug_index_assertsemulate_precision_casts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r   r   _fuse_ddp_communication_fuse_ddp_bucket_sizer   r   r   r   r   r   libfb.pyr   __package__get_dir_pathr  r  replacesepr   
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastdisable_padding_cpupadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler   permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexr   /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr   r   r   r   r   r   r   enable_linear_binary_foldingr   r   ra   rS  rr  r  r  r  r  r  r  r  r  Tensorr  torch.utils._config_typingmodulesr   r   r   r   <module>ra     sp   	 
 I I  ( ( V V **..!@#F#M# Cx~ CHTN Cx~ CKx~ K8D> 
 
 JJNN7=D 
 	     ?1  )F(G x~ G ./ "8D>  " d ! )F(G x~ G 1V0W x~ W  ZZ^^,PQUXX d X )-  ,  $W1&   (1*  JJNN#>DKT K >DK    zz~~:C@CGjjnn89S@ >DK      **..!@#F#M 

<=D HJzz~~HWCD 
    *+     TX 5??<<PP WTX EOO==QQ X EI x%((..)94)? @A HEI 588>>*:D*@!AB I
 JN hx)=)=(>(DEF M 	 	;	<=:;	= 	   $)     ( 68 c4S>12 7 79 $sDcN23 8   zz~~&JCPTWW  # 
  	+ c3h ( FQAB P $)   (	?@A>?A	
	*         &,?<& d  zz~~:;sB (NOSVV  JJNN#DEL  13  2  KKDRZZ^^,PQUXX   ZZ^^.0A%'   ZZ^^.%'  DF::>>2ID%' (? @ 
 JJNN<cBcI     

'LMQTT JJNN45<	 jjnn%HISP 
 /3 +03 -14 . 

'LMQTT  JJNN<=D  JJNNJKsR ( $'JJNN<cB$   

'LbQ JJNN#DjQ BC BD BK K KGC GD G *I 
 !& 1 1Ss JJNN68JKsR  JJNN+KSQUXX  ZZ^^$FLPSS  **..!@ASH
            ZZ^^$@ASHd H(HISP $ P

'LbQ JJNN=sCsJ D  !#  JJNN<cBcI 
 $%              ::>>"BCHCO  "&   %           "# &+ #  $    JJNN:C@CG 
  1 11OUe>O>O5O [8$8  JJNNEsKsR #
 &* Xc] )(,  ,
C 
 67 S 7      I U8I+>+C%D E 
 ! D  : :
6T 6! !J *3:P:R# R 3- ;;
 $&33[00bff=zJ   '33J?
    

<cBcI JJNN8#>#E       (          >DK # 
 $    ! zz~~5r: B&  , 3"  +-**.."D+ (3- 
 JJNNHISP 0    8#>#E$ E %* T )  %  $
 $ t # 8= . < ,1 "D 0 49 *D 8 JJNN?EL  **..)JCPTWW 4 Wi! i!Zp; p;fW) W)tW Wt7  7 v 38W./ 7 -5g() 4 8@C @CF" T#Y * T#Y   UWhellELLI4OPQ V7 7 , ckk(+ ,] $   s   .An) n) )
n76n7