
    fTh:]                        S SK r S SKJrJrJr  S SKrS SKJr  S SKJs  J	r
  SSKJr  SSKJr  SSKJrJr  SSKJr  SSKJr  S	S
KJrJrJrJrJrJrJrJrJr  SSK J!r!  \RD                  " \#5      r$ " S S\5      r% " S S\5      r& " S S\RN                  5      r( " S S\5      r) " S S\RN                  5      r* " S S\RN                  5      r+ " S S\RN                  5      r, " S S\RN                  5      r- " S S\RN                  5      r. " S S \\5      r/\r0 " S! S"\5      r1 " S# S$\5      r2 " S% S&\5      r3 " S' S(\5      r4 " S) S*\5      r5/ S+Qr6g),    N)OptionalTupleUnion   )is_deepspeed_zero3_enabled)is_fsdp_managed_module)BaseModelOutputWav2Vec2BaseModelOutput)PreTrainedModel)logging   )	Wav2Vec2FeatureProjectionWav2Vec2FeedForward#Wav2Vec2ForAudioFrameClassificationWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ForXVectorWav2Vec2ModelWav2Vec2PositionalConvEmbeddingWav2Vec2PreTrainedModel   )WavLMConfigc                       \ rS rSrSrg)WavLMPositionalConvEmbedding    N__name__
__module____qualname____firstlineno____static_attributes__r       _/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/wavlm/modular_wavlm.pyr   r          r#   r   c                       \ rS rSrSrg)WavLMFeatureProjection"   r   Nr   r   r#   r$   r'   r'   "   r%   r#   r'   c                   (  ^  \ rS rSrSr    SS\S\S\S\S\S\4U 4S	 jjjr    SS
\	R                  S\\	R                     S\\	R                     S\S\\	R                  \\	R                     \\\	R                        4   4
S jjrS
\	R                  S\\	R                   \	R"                  4   S\	R                  S\S\	R                  \	R                  44
S jrS\S\S\	R                  4S jrS\	R                  S\	R                  4S jrSrU =r$ )WavLMAttention&   z=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsdropoutnum_bucketsmax_distancehas_relative_position_biasc                   > [         TU ]  5         Xl        X l        X0l        X-  U l        U R
                  U-  U R                  :w  a  [        SU R                   SU S35      eU R
                  S-  U l        [        R                  " X5      U l
        [        R                  " X5      U l        [        R                  " X5      U l        [        R                  " X5      U l        X@l        XPl        [        R                   " ["        R$                  " SU R                  SS5      5      U l        [        R                  " U R
                  S5      U l        U(       a1  [        R*                  " U R                  U R                  5      U l        g g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      r      )super__init__r,   r-   r.   head_dim
ValueErrorscalingnnLineark_projv_projq_projout_projr/   r0   	Parametertorchonesgru_rel_pos_constgru_rel_pos_linear	Embeddingrel_attn_embed)selfr,   r-   r.   r/   r0   r1   	__class__s          r$   r5   WavLMAttention.__init__)   s#    	""!.MMI%$..8MdnnM]$YKr3  }}d*ii	5ii	5ii	5		)7&(!#ejjDNNAq.Q!R"$))DMM1"=%"$,,t/?/?"PD &r#   hidden_statesattention_maskposition_biasoutput_attentionsreturnc                    UR                  5       u  pgnUcP  U R                  Xw5      nUR                  S5      R                  USSS5      R	                  X`R
                  -  Xw5      nUR	                  UR                  SS U R
                  S4-   5      n	U	R                  SSSS5      n	U R                  U	5      n
U
R	                  U	R                  SS S-   5      R                  S5      n
[        R                  " U
5      R                  SSS9u  pXU R                  -  S	-
  -  S
-   nUR	                  X`R
                  -  SS5      U-  nUR	                  SXw45      nU R                  XX5      u  nnUUU4$ )z'Attention layer with relative attentionNr   r   r   r   )r      dim      ?g       @)sizecompute_bias	unsqueezerepeatviewr-   shapepermuterC   sumr@   sigmoidchunkrB   torch_multi_head_self_attention)rF   rI   rJ   rK   rL   indexbsztgt_len_gated_hidden_statesrelative_position_projgate_agate_bgate_outputgated_position_biasattn_outputattn_weightss                    r$   forwardWavLMAttention.forwardM   s    (,,.a   --g?M''*11#q!Q?DDS>>EY[bl  ,001D1DSb1IT^^]_L`1`a199!Q1E "&!8!89L!M!7!<!<=P=V=VWZXZ=[^d=d!e!i!ijl!m '=>DDQBDO)?)? ?# EFL *..s^^/CRKm[166G7MN$($H$H+>%
!\ L-77r#   rh   c                 T   UR                  SS5      =n=pgUb  UR                  S5      OSnS=pSn[        R                  " UUUU R                  U R
                  [        R                  " S/5      [        R                  " U R                  R                  U R                  R                  U R                  R                  45      U	U
UU R                  U R                  R                  U R                  R                  U R                   UUUSU R                  R                  U R                  R                  U R                  R                  S9u  pUR                  SS5      nUbC  USS2S4   R#                  UR$                  SS U R
                  4-   UR$                  SS -   5      nX4$ )zCsimple wrapper around torch's multi_head_attention_forward functionr   r   NFT)use_separate_proj_weightq_proj_weightk_proj_weightv_proj_weight)	transposeneFmulti_head_attention_forwardr,   r-   r@   emptycatr=   biasr;   r<   r.   r>   weighttrainingbroadcast_torY   )rF   rI   rJ   rh   rL   querykeyvaluekey_padding_maskbias_kbias_vadd_zero_attnri   rj   s                 r$   r^   .WavLMAttention.torch_multi_head_self_attentionv   s    ,55a;;;3A3M>,,Q/SW  %&$B$BNNNNKKIIt{{'')9)94;;;K;KLMLLMM  MMMM%)++,,++,,++,,+%
!2 "++Aq1# (40==""2A&$..)::\=O=OPQPR=SSL ((r#   query_length
key_lengthc                 ~   [         R                  " U[         R                  S9S S 2S 4   n[         R                  " U[         R                  S9S S S 24   nXC-
  nU R                  U5      nUR	                  U R
                  R                  R                  5      nU R                  U5      nUR                  / SQ5      nU$ )N)dtype)r   r   r   )	r@   arangelong_relative_positions_buckettorE   ry   devicerZ   )rF   r   r   context_positionmemory_positionrelative_positionrelative_position_bucketvaluess           r$   rU   WavLMAttention.compute_bias   s     <<EJJG4P,,zDT1WM+>#'#B#BCT#U #;#>#>t?R?R?Y?Y?`?`#a $$%=>	*r#   relative_positionsc                 &   U R                   S-  nUS:  R                  [        R                  5      U-  n[        R                  " U5      nUS-  nX:  n[        R
                  " UR                  5       U-  5      nU[        R
                  " U R                  U-  5      -  nXbU-
  -  nXF-   R                  [        R                  5      n[        R                  " U[        R                  " XrS-
  5      5      nU[        R                  " XQU5      -  nU$ )Nr   r   r   )r/   r   r@   r   abslogfloatmathr0   min	full_likewhere)rF   r   r/   relative_buckets	max_exactis_smallrelative_positions_if_largerelative_position_if_larges           r$   r   )WavLMAttention._relative_positions_bucket   s    &&!+.266uzzB[P"YY'9:1$	%1&+ii0B0H0H0JY0V&W#&ADHHTM^M^ajMjDk&k#&AS\E\&]#&/&M%Q%QRWR\R\%]"%*YY&8RbcTc(d&
" 	EKKF`aar#   )r.   r,   rB   rC   r6   r;   r0   r/   r-   r>   r=   rE   r8   r<   )        i@  i   TNNFr   )r   r   r    r!   __doc__intr   boolr5   r@   Tensorr   r   rk   FloatTensorr   
LongTensor
BoolTensorr^   rU   r   r"   __classcell__rG   s   @r$   r*   r*   &   s   G +/"Q"Q "Q 	"Q
 "Q "Q %)"Q "QN 2604"''8||'8 !.'8  -	'8
  '8 
u||Xell3XeELL>Q5RR	S'8R5)((5) e..0@0@@A5) #..	5)
  5) 

U..	/5)n # %BSBS  U=N=N  SXSdSd    r#   r*   c                       \ rS rSrSrg)WavLMFeedForward   r   Nr   r   r#   r$   r   r      r%   r#   r   c                   B   ^  \ rS rSrSS\S\4U 4S jjjrSS jrSrU =r	$ )	WavLMEncoderLayer   configr1   c           	        > [         TU ]  5         [        UR                  UR                  UR
                  UR                  UR                  US9U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                  S9U l        [!        U5      U l        [        R                  " UR                  UR                  S9U l        g N)r,   r-   r.   r/   r0   r1   epsr4   r5   r*   hidden_sizenum_attention_headsattention_dropoutr/   max_bucket_distance	attentionr9   Dropouthidden_dropoutr.   	LayerNormlayer_norm_eps
layer_normr   feed_forwardfinal_layer_normrF   r   r1   rG   s      r$   r5   WavLMEncoderLayer.__init__       '((00,,**33'A
 zz&"7"78,,v'9'9v?T?TU,V4 "V-?-?VEZEZ [r#   c                     UnU R                  UUUUUS9u  pnU R                  U5      nXa-   nU R                  U5      nXR                  U5      -   nU R	                  U5      nX4nU(       a  X4-  nU$ )NrJ   rK   rL   r_   )r   r.   r   r   r   )	rF   rI   rJ   rK   rL   r_   attn_residualrj   outputss	            r$   rk   WavLMEncoderLayer.forward   s    %59^^)'/ 6D 6
2] ]3%56%(9(9-(HH--m< 0&Gr#   r   r.   r   r   r   Tr   
r   r   r    r!   r   r   r5   rk   r"   r   r   s   @r$   r   r      s)    \{ \ \ \ r#   r   c                   B   ^  \ rS rSrSS\S\4U 4S jjjrSS jrSrU =r	$ )	 WavLMEncoderLayerStableLayerNorm   r   r1   c           	        > [         TU ]  5         [        UR                  UR                  UR
                  UR                  UR                  US9U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                  S9U l        [!        U5      U l        [        R                  " UR                  UR                  S9U l        g r   r   r   s      r$   r5   )WavLMEncoderLayerStableLayerNorm.__init__   r   r#   c                     UnU R                  U5      nU R                  UUUUS9u  pnU R                  U5      nXQ-   nXR                  U R	                  U5      5      -   nX4nU(       a  Xv4-  nU$ )N)rJ   rK   rL   )r   r   r.   r   r   )rF   rI   rJ   rK   rL   r   rj   r   s           r$   rk   (WavLMEncoderLayerStableLayerNorm.forward	  s    %659^^)'/	 6D 6
2] ]3%5%(9(9$:O:OP]:^(__ 0&Gr#   r   r   )NNFr   r   s   @r$   r   r      s)    \{ \ \ \ r#   r   c                   :   ^  \ rS rSrU 4S jr    SS jrSrU =r$ )WavLMEncoderi  c           
        > [         TU ]  5         Xl        [        U5      U l        [
        R                  " UR                  UR                  S9U l	        [
        R                  " UR                  5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[!        XS:H  S9PM     sn5      U l        SU l        g s  snf Nr   r   )r1   F)r4   r5   r   r   pos_conv_embedr9   r   r   r   r   r   r   r.   
ModuleListrangenum_hidden_layersr   layersgradient_checkpointingrF   r   irG   s      r$   r5   WavLMEncoder.__init__  s    :6B,,v'9'9v?T?TUzz&"7"78mmUZ[a[s[sUtuUtPQv6KUtu
 ',# v    C
c           	         U(       a  SOS nU(       a  SOS nUb4  UR                  S5      R                  SSUR                  S   5      nSX) '   U R                  U5      n	X-   nU R	                  U5      nU R                  U5      n[        5       =(       d    [        U 5      n
S n[        U R                  5       H  u  pU(       a  Xa4-   n[        R                  " / 5      nU R                  =(       a$    US:  =(       a    XR                  R                  :  nU(       a  U
(       aS  U R                  (       a1  U R                  (       a   U R!                  UR"                  UUUU5      nO
U" UUUUUS9nUS S u  pU(       a  SnU(       d  M  UWS   4-   nM     U(       a  Xa4-   nU(       d  [%        S XU4 5       5      $ ['        UUUS	9$ )
Nr   rO   r   r   r   r   NNNc              3   .   #    U  H  oc  M  Uv   M     g 7fNr   .0vs     r$   	<genexpr>'WavLMEncoder.forward.<locals>.<genexpr>i       m$[q$[   	last_hidden_staterI   
attentions)rV   rW   rY   r   r   r.   r   r   	enumerater   r@   randrz   r   	layerdropr   _gradient_checkpointing_func__call__tupler	   rF   rI   rJ   rL   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsexpand_attention_maskposition_embeddingssynced_gpusrK   r   layerdropout_probabilityskip_the_layerlayer_outputss                    r$   rk   WavLMEncoder.forward*  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M01"11-@%;6]302R6LT6R!$++.HA#$58H$H! #(**R.!]]fq1uf:MP[P[PePe:eN![..4==$($E$E%&%)%M %*%'5&3*;%M 0=Ra/@, 2  &9]1=M<O&O#C /F   14D Dm]GZ$[mmm++*
 	
r#   r   r.   r   r   r   r   NFFTr   r   r    r!   r5   rk   r"   r   r   s   @r$   r   r     s"    	, "D
 D
r#   r   c                   :   ^  \ rS rSrU 4S jr    SS jrSrU =r$ )WavLMEncoderStableLayerNormiq  c           
        > [         TU ]  5         Xl        [        U5      U l        [
        R                  " UR                  UR                  S9U l	        [
        R                  " UR                  5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[!        XS:H  S9PM     sn5      U l        SU l        g s  snf r   )r4   r5   r   r   r   r9   r   r   r   r   r   r   r.   r   r   r   r   r   r   r   s      r$   r5   $WavLMEncoderStableLayerNorm.__init__r  s    :6B,,v'9'9v?T?TUzz&"7"78mm v7788A 1Z[U[]8
 ',#r   c                    U(       a  SOS nU(       a  SOS nUb4  UR                  S5      R                  SSUR                  S   5      nSX) '   U R                  U5      n	X-   nU R	                  U5      n[        5       =(       d    [        U 5      n
S n[        U R                  5       H  u  pU(       a  Xa4-   n[        R                  " / 5      nU R                  =(       a$    US:  =(       a    XR                  R                  :  nU(       a  U
(       aR  U R                  (       a1  U R                  (       a   U R                  UR                   UUUU5      nO	U" UUUUS9nUS S u  pU(       a  SnU(       d  M  UWS   4-   nM     U R#                  U5      nU(       a  Xa4-   nU(       d  [%        S XU4 5       5      $ ['        XUS	9$ )
Nr   rO   r   r   r   )rJ   rL   rK   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r   r   s     r$   r   6WavLMEncoderStableLayerNorm.forward.<locals>.<genexpr>  r   r   r   )rV   rW   rY   r   r.   r   r   r   r   r@   r   rz   r   r   r   r   r   r   r   r	   r   s                    r$   rk   #WavLMEncoderStableLayerNorm.forward  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M01"11-@%;]302R6LT6R!$++.HA#$58H$H! #(**R.!]]fq1uf:MP[P[PePe:eN![ ..4==$($E$E%&%)%M %*%'5*;&3	%M 0=Ra/@, 2  &9]1=M<O&O#A /D 6 14D Dm]GZ$[mmm+Yl
 	
r#   r  r  r  r   s   @r$   r
  r
  q  s"    ," "B
 B
r#   r
  c                   B   ^  \ rS rSrSrU 4S jr\S 5       rS rSr	U =r
$ )WavLMGumbelVectorQuantizeri  z
Vector quantization using gumbel softmax. See [CATEGORICAL REPARAMETERIZATION WITH
GUMBEL-SOFTMAX](https://arxiv.org/pdf/1611.01144.pdf) for more information.
c                 8  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U R                  -  S:w  a&  [        SUR                   SU R                   S35      e[        R                  " [        R                  " SU R                  U R
                  -  UR                  U R                  -  5      5      U l        [        R                  " UR                  S   U R                  U R
                  -  5      U l        SU l        g )Nr   z`config.codevector_dim z5 must be divisible by `config.num_codevector_groups` z for concatenation.r   rO   r   )r4   r5   num_codevector_groups
num_groupsnum_codevectors_per_groupnum_varscodevector_dimr7   r9   r?   r@   r   codevectorsr:   conv_dimweight_projtemperature)rF   r   rG   s     r$   r5   #WavLMGumbelVectorQuantizer.__init__  s     6688  4??2a7)&*?*?)@ A66:oo5F G%%  <<a4==!@&BWBW[_[j[jBjk
 99V__R%8$//DMM:YZ r#   c           	          U R                  SS9n[        R                  " [        R                  " U[        R                  " US-   5      -  SS9* 5      R                  5       nU$ )Nr   rQ   gHz>rO   )meanr@   expr[   r   )probsmarginal_probs
perplexitys      r$   _compute_perplexity.WavLMGumbelVectorQuantizer._compute_perplexity  sR    *YY		.599^VZEZ;[*[ac ddeiik
r#   c                    UR                   u  p#nU R                  U5      nUR                  X#-  U R                  -  S5      nU R                  (       a  [
        R                  R                  UR                  5       U R                  SS9nUR                  U5      n[        R                  " UR                  X#-  U R                  S5      R                  5       SS9nU R                  U5      nOyUR                  SS9nUR                  " UR                   6 R!                  SUR                  SS5      S5      nUR                  X#-  U R                  S5      nU R                  U5      nUR                  X#-  S5      nUR#                  S5      U R$                  -  n	U	R                  X#-  U R                  U R&                  S5      n
U
R)                  S5      R                  X#S5      n
X4$ )NrO   T)tauhardrQ   r   rS   )rY   r  rX   r  rz   r9   
functionalgumbel_softmaxr   r  type_asr@   softmaxr$  argmax	new_zerosscatter_rV   r  r  r[   )rF   rI   
batch_sizesequence_lengthr   codevector_probscodevector_soft_distr#  codevector_idxcodevectors_per_groupr  s              r$   rk   "WavLMGumbelVectorQuantizer.forward  s   3@3F3F0
[ ((7%**:+G$//+Y[]^==!}};;M<O<O<QW[WgWgnr;s/77F $)=="":#?RTU[[]ce$  112FGJ +11b19N,668K8KLUUN''A.   044Z5QSWSbSbdfg112BCJ+001MrR 0 : :2 >AQAQ Q+001Mt`d`m`moqr!oob)..zBO&&r#   )r  r  r  r  r  )r   r   r    r!   r   r5   staticmethodr$  rk   r"   r   r   s   @r$   r  r    s+    
*  
"' "'r#   r  c                   D    \ rS rSr\rSrSrSrSr	Sr
S rS rS rS	 rS
rg)WavLMPreTrainedModeli  wavlminput_valuesTFc           
         [        U[        5      (       a  UR                  R                  R                  R                  SSS9  UR                  R                  R                  R                  5         [        R                  R                  UR                  5        g[        U[        5      (       a  [        R                  R                  UR                  R                  SS[        R                  " SUR                  R                   S   UR                  R"                  -  -  5      -  S9  [        R                  R%                  UR                  R                  S5        g[        U[&        5      (       a  [        R                  " SUR(                  R*                  -  5      n[        R                  R                  UR(                  R                  U* US9  [        R                  R                  UR(                  R                  U* US9  g[        U[        R,                  5      (       ak  UR                  R                  R                  SU R.                  R0                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R2                  [        R4                  45      (       aJ  UR                  R                  R                  5         UR                  R                  R7                  S5        g[        U[        R8                  5      (       a  [        R                  R;                  UR                  5        UR                  bh  [        R                  " UR<                  UR"                  UR                   S   -  -  5      n[        R                  R                  UR                  U* US9  ggg)	zInitialize the weightsr   r   )r  stdr   r   )abNrS   )
isinstancer  r  ry   datanormal_rx   zero_r9   inituniform_r  r   convr   sqrtkernel_sizein_channels	constant_r'   
projectionin_featuresr:   r   initializer_ranger   	GroupNormfill_Conv1dkaiming_normal_groups)rF   moduleks      r$   _init_weights"WavLMPreTrainedModel._init_weights  s    f899%%**222C##((..0GGV//0 <==GGOO""		!v{{'>'>q'AFKKD[D['["\]]  
 GGfkk..2 677		!f//;;;<AGGV..55!qAGGV..33rQ?		**MM&&CT[[5R5R&S{{&  &&( 'r|| <==KK""$MM$$S)		**GG##FMM2{{&IIfmmv/A/AFDVDVWXDY/YZ[  a 8 ' +r#   c                     [        S5      eNzNot needed for WavLMAttributeErrorrF   s    r$   _get_adapters"WavLMPreTrainedModel._get_adapters4      344r#   c                     [        S5      erY  rZ  r\  s    r$   init_adapter_layers(WavLMPreTrainedModel.init_adapter_layers7  r_  r#   c                     [        S5      erY  rZ  r\  s    r$   load_adapter!WavLMPreTrainedModel.load_adapter:  r_  r#   r   N)r   r   r    r!   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_2_supports_sdparV  r]  ra  rd  r"   r   r#   r$   r:  r:    s8    L$O&*#"N9B555r#   r:  c                       \ rS rSrSrg)
WavLMModeliA  r   Nr   r   r#   r$   rm  rm  A  r%   r#   rm  c                       \ rS rSrSrg)WavLMForCTCiE  r   Nr   r   r#   r$   ro  ro  E  r%   r#   ro  c                       \ rS rSrSrg)WavLMForSequenceClassificationiI  r   Nr   r   r#   r$   rq  rq  I  r%   r#   rq  c                       \ rS rSrSrg) WavLMForAudioFrameClassificationiM  r   Nr   r   r#   r$   rs  rs  M  r%   r#   rs  c                       \ rS rSrSrg)WavLMForXVectoriQ  r   Nr   r   r#   r$   ru  ru  Q  r%   r#   ru  )rs  ro  rq  ru  rm  r:  )7r   typingr   r   r   r@   torch.nnr9   torch.nn.functionalr*  rt   integrations.deepspeedr   integrations.fsdpr   modeling_outputsr	   r
   modeling_utilsr   utilsr   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   configuration_wavlmr   
get_loggerr   loggerr   r'   Moduler*   r   r   r   r   r
  r  r:  WavLMBaseModelOutputrm  ro  rq  rs  ru  __all__r   r#   r$   <module>r     sB    ) )     @ 7 H - 
 
 
 - 
		H	%	#B 		6 	c RYY c L	* 	&		 &R"ryy "JP
299 P
fQ
")) Q
hC' C'L05?,C 05f / 	 		. 		%F 		'J 		( 	r#   