
    fTh                       S SK r S SKJrJrJr  S SKrS SKrS SKJ	r	  S SKJ
r
  SSKJr  SSKJr  SSKJr  SSKJrJr  SS	KJrJrJr  SS
KJr  SSKJrJr  SSKJr  \" 5       (       a  SSKJr  \R@                  " \!5      r" " S S\	RF                  5      r$ " S S\	RF                  5      r% " S S\	RF                  5      r& " S S\	RF                  5      r' " S S\	RF                  5      r( " S S\	RF                  5      r) " S S\	RF                  5      r* " S S\	RF                  5      r+ " S S \+5      r, " S! S"\+5      r- " S# S$\	RF                  5      r.\+\-\,S%.r/ " S& S'\	RF                  5      r0 " S( S)\	RF                  5      r1 " S* S+\	RF                  5      r2 " S, S-\	RF                  5      r3 " S. S/\	RF                  5      r4\ " S0 S1\5      5       r5  SCS2\\6\64   S3\7S4\6S5\\Rp                     S6\6S7\Rr                  4S8 jjr:\ " S9 S:\55      5       r;Sr<\" S;S<9 " S= S>\55      5       r=\" S?S<9 " S@ SA\55      5       r>/ SBQr?g)D    N)OptionalTupleUnion)CrossEntropyLoss   )ACT2FN)is_deepspeed_zero3_enabled)is_fsdp_managed_module)!flash_attn_supports_top_left_maskis_flash_attn_available)BaseModelOutputCausalLMOutputSequenceClassifierOutput)PreTrainedModel)auto_docstringlogging   )HubertConfig)_flash_attention_forwardc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertPositionalConvEmbedding    c                 2  > [         TU ]  5         [        R                  " UR                  UR                  UR
                  UR
                  S-  UR                  S9U l        S U l        UR                  (       a'  [        R                  " UR                  5      U l        GO[        R                  R                  n[        [        R                  R                  S5      (       a$  [        R                  R                  R                  n[        5       (       Ga%  SS KnUR"                  R%                  U R                  R&                  SS9   U" U R                  SSS9U l        S S S 5        [        U R                  S5      (       aU  U R                  R                  R&                  R(                  nU R                  R                  R&                  R*                  nO,U R                  R,                  nU R                  R.                  nUR"                  R1                  X5        UR"                  R1                  X5        OU" U R                  SSS9U l        [3        UR
                  5      U l        [6        UR8                     U l        g ! , (       d  f       GN,= f)	N   )kernel_sizepaddinggroupsweight_normr   modifier_rankweight)namedimparametrizations)super__init__nnConv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsconv
batch_normconv_pos_batch_normBatchNorm1dutilsr   hasattrr$   r	   	deepspeedzeroGatheredParametersr!   	original0	original1weight_gweight_vregister_external_parameterHubertSamePadLayerr   r   feat_extract_activation
activation)selfconfigr   r2   r7   r8   	__class__s         b/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/hubert/modeling_hubert.pyr&   &HubertPositionalConvEmbedding.__init__!   s   II6622a777
	 %% nnV-?-?@DO((..Krxx00-@@ hh77CC)++ ^^66tyy7G7GWX6Y +DIIH! LDI Z499&899#yy99@@JJH#yy99@@JJH#yy11H#yy11H::4J::4J'		aH	)&*H*HI !?!?@ ZYs   
J
Jc                     UR                  SS5      nU R                  b  U R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nUR                  SS5      nU$ Nr   r   )	transposer-   r,   r   r<   r=   hidden_statess     r@   forward%HubertPositionalConvEmbedding.forwardF   sn    %//15??& OOM:M		-0]36%//15    )r<   r-   r,   r   __name__
__module____qualname____firstlineno__r&   rG   __static_attributes____classcell__r?   s   @r@   r   r       s    #AJ	 	rI   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )r:   R   c                 R   > [         TU ]  5         US-  S:X  a  SU l        g SU l        g )Nr   r   r   )r%   r&   num_pad_remove)r=   r*   r?   s     r@   r&   HubertSamePadLayer.__init__S   s)    #:Q#>!#CarI   c                 X    U R                   S:  a  US S 2S S 2S U R                   * 24   nU$ Nr   rU   rE   s     r@   rG   HubertSamePadLayer.forwardW   s6    ")!Q0F43F3F2F0F*FGMrI   rY   rJ   rQ   s   @r@   r:   r:   R   s    K rI   r:   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )HubertNoLayerNormConvLayer]   c                 b  > [         TU ]  5         US:  a  UR                  US-
     OSU l        UR                  U   U l        [
        R                  " U R                  U R                  UR                  U   UR                  U   UR                  S9U l
        [        UR                     U l        g )Nr   r   r   stridebias)r%   r&   conv_dimin_conv_dimout_conv_dimr'   r(   conv_kernelconv_stride	conv_biasr,   r   r;   r<   r=   r>   layer_idr?   s      r@   r&   #HubertNoLayerNormConvLayer.__init__^   s    <DqL6??8a<8a"OOH5II**84%%h/!!
	 !!?!?@rI   c                 J    U R                  U5      nU R                  U5      nU$ N)r,   r<   rE   s     r@   rG   "HubertNoLayerNormConvLayer.forwardl   s$    		-06rI   )r<   r,   rc   rd   r   rJ   rQ   s   @r@   r\   r\   ]   s    A rI   r\   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )HubertLayerNormConvLayerr   c                   > [         TU ]  5         US:  a  UR                  US-
     OSU l        UR                  U   U l        [
        R                  " U R                  U R                  UR                  U   UR                  U   UR                  S9U l
        [
        R                  " U R                  SS9U l        [        UR                     U l        g )Nr   r   r_   T)elementwise_affine)r%   r&   rb   rc   rd   r'   r(   re   rf   rg   r,   	LayerNorm
layer_normr   r;   r<   rh   s      r@   r&   !HubertLayerNormConvLayer.__init__s   s    <DqL6??8a<8a"OOH5II**84%%h/!!
	 ,,t'8'8TR !?!?@rI   c                     U R                  U5      nUR                  SS5      nU R                  U5      nUR                  SS5      nU R                  U5      nU$ )N)r,   rD   ru   r<   rE   s     r@   rG    HubertLayerNormConvLayer.forward   sV    		-0%//B76%//B76rI   r<   r,   rc   ru   rd   rn   rJ   rQ   s   @r@   rp   rp   r   s    A rI   rp   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )HubertGroupNormConvLayer   c                   > [         TU ]  5         US:  a  UR                  US-
     OSU l        UR                  U   U l        [
        R                  " U R                  U R                  UR                  U   UR                  U   UR                  S9U l
        [        UR                     U l        [
        R                  " U R                  U R                  SS9U l        g )Nr   r   r_   T)
num_groupsnum_channelsaffine)r%   r&   rb   rc   rd   r'   r(   re   rf   rg   r,   r   r;   r<   	GroupNormru   rh   s      r@   r&   !HubertGroupNormConvLayer.__init__   s    <DqL6??8a<8a"OOH5II**84%%h/!!
	 !!?!?@,,$2C2CRVRcRclpqrI   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ rl   )r,   ru   r<   rE   s     r@   rG    HubertGroupNormConvLayer.forward   s2    		-066rI   r{   rn   rJ   rQ   s   @r@   r}   r}      s    r  rI   r}   c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )HubertFeatureEncoder   z.Construct the features from raw audio waveformc           	        > [         TU ]  5         UR                  S:X  a@  [        USS9/[	        UR
                  S-
  5       Vs/ s H  n[        XS-   S9PM     sn-   nOVUR                  S:X  a-  [	        UR
                  5       Vs/ s H  n[        XS9PM     nnO[        SUR                   S35      e[        R                  " U5      U l        SU l        S	U l        g s  snf s  snf )
Ngroupr   )ri   r   layerz`config.feat_extract_norm` is z), but has to be one of ['group', 'layer']FT)r%   r&   feat_extract_normr}   rangenum_feat_extract_layersr\   rp   
ValueErrorr'   
ModuleListconv_layersgradient_checkpointing_requires_grad)r=   r>   ir   r?   s       r@   r&   HubertFeatureEncoder.__init__   s    ##w.3FQGHLQRXRpRpstRtLuLLuq*6EBLuL K %%0QVW]WuWuQvwQvA3FGQvKwK01I1I0JJst  ==5&+#"L xs   C C%c                 N    U R                  5        H
  nSUl        M     SU l        g NF)
parametersrequires_gradr   r=   params     r@   _freeze_parameters'HubertFeatureEncoder._freeze_parameters   s#    __&E"'E '#rI   c                 B   US S 2S 4   nU R                   (       a  U R                  (       a  SUl        U R                   H\  nU R                   (       a@  U R                  (       a/  U R                  (       a  U R                  UR                  U5      nMT  U" U5      nM^     U$ )NT)r   trainingr   r   r   _gradient_checkpointing_func__call__)r=   input_valuesrF   
conv_layers       r@   rG   HubertFeatureEncoder.forward   s    $QW- 4==*.M'**J""t'B'Bt}} $ A A''!!
 !+= 9 + rI   )r   r   r   )
rK   rL   rM   rN   __doc__r&   r   rG   rO   rP   rQ   s   @r@   r   r      s    8#"$
 rI   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertFeatureProjection   c                 x  > [         TU ]  5         UR                  U l        U R                  (       a1  [        R                  " UR
                  S   UR                  S9U l        [        R                  " UR
                  S   UR                  5      U l
        [        R                  " UR                  5      U l        g )Nry   eps)r%   r&   feat_proj_layer_normr'   rt   rb   layer_norm_epsru   Linearr)   
projectionDropoutfeat_proj_dropoutdropoutr=   r>   r?   s     r@   r&    HubertFeatureProjection.__init__   s}    $*$?$?!$$ ll6??2+>FDYDYZDO))FOOB$79K9KLzz&":":;rI   c                     U R                   (       a  U R                  U5      nU R                  U5      nU R                  U5      nU$ rl   )r   ru   r   r   rE   s     r@   rG   HubertFeatureProjection.forward   s;    $$ OOM:M6]3rI   )r   r   ru   r   rJ   rQ   s   @r@   r   r      s    < rI   r   c                     ^  \ rS rSrSr     SS\S\S\S\S\S\S	\\	   4U 4S
 jjjr
S\R                  S\S\4S jr     SS\R                  S\\R                     S\\\R                        S\\R                     S\\R                     S\S\\R                  \\R                     \\\R                        4   4S jjrSrU =r$ )HubertAttention   z=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsr   
is_decoderra   	is_causalr>   c                   > [         TU ]  5         Xl        X l        X0l        X-  U l        Xpl        U R
                  U-  U R                  :w  a  [        SU R                   SU S35      eU R
                  S-  U l        X@l	        X`l
        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      )ra   )r%   r&   r   r   r   head_dimr>   r   scalingr   r   r'   r   k_projv_projq_projout_proj)	r=   r   r   r   r   ra   r   r>   r?   s	           r@   r&   HubertAttention.__init__   s     	""!.MMI%$..8MdnnM]$YKr3  }}d*$"ii	4@ii	4@ii	4@		)TBrI   tensorseq_lenbszc                     UR                  X2U R                  U R                  5      R                  SS5      R	                  5       $ rC   )viewr   r   rD   
contiguousr=   r   r   r   s       r@   _shapeHubertAttention._shape  s5    {{3GQQRSUVWbbddrI   rF   key_value_statespast_key_valueattention_masklayer_head_maskoutput_attentionsreturnc                 	   USLnUR                  5       u  pn
U R                  U5      U R                  -  nU(       a2  Ub/  US   R                  S   UR                  S   :X  a  US   nUS   nGOU(       aE  U R	                  U R                  U5      SU5      nU R	                  U R                  U5      SU5      nOUby  U R	                  U R                  U5      SU5      nU R	                  U R                  U5      SU5      n[        R                  " US   U/SS9n[        R                  " US   U/SS9nODU R	                  U R                  U5      SU5      nU R	                  U R                  U5      SU5      nU R                  (       a  X4nXR                  -  SU R                  4nU R	                  XU5      R                  " U6 nUR                  " U6 nUR                  " U6 nUR                  S5      n[        R                  " XR                  SS5      5      nUR                  5       XR                  -  X4:w  a-  [!        SXR                  -  X4 SUR                  5        35      eUbv  UR                  5       USX4:w  a"  [!        S	USX4 SUR                  5        35      eUR                  XR                  X5      U-   nUR                  XR                  -  X5      n["        R$                  R'                  USS9nUb  UR                  5       U R                  4:w  a*  [!        S
U R                  4 SUR                  5        35      eUR                  SSSS5      UR                  XR                  X5      -  nUR                  XR                  -  X5      nU(       a;  UR                  XR                  X5      nUR                  XR                  -  X5      nOSn["        R$                  R)                  UU R(                  U R*                  S9n[        R                  " UU5      nUR                  5       XR                  -  XR                  4:w  a7  [!        SXR                  -  XR                  4 SUR                  5        35      eUR                  XR                  XR                  5      nUR                  SS5      nUR                  XU R,                  5      nU R/                  U5      nUUU4$ )#Input shape: Batch x Time x ChannelNr   r   r   ry   r#   z$Attention weights should be of size 	, but is z!Attention mask should be of size z/Head mask for a single layer should be of size )pr    `attn_output` should be of size )sizer   r   shaper   r   r   torchcatr   r   r   r   reshapebmmrD   r   r'   
functionalsoftmaxr   r   r   r   )r=   rF   r   r   r   r   r   is_cross_attentionr   tgt_len_query_states
key_statesvalue_states
proj_shapesrc_lenattn_weightsattn_weights_reshaped
attn_probsattn_outputs                       r@   rG   HubertAttention.forward  s    .T9',,.a {{=1DLL@ *q!''*.>.D.DQ.GG (*J)!,LT[[1A%BBLJ;;t{{3C'Db#NL'T[[%?SIJ;;t{{='A2sKLN1$5z#BJJ 99nQ&7%FANL T[[%?SIJ;;t{{='A2sKL?? )7NNN*B>
{{<#>CCZP''4
#++Z8//!$yy/C/CAq/IJ3#7"JJ6nn8Lg7_6` a %%'(* 
 %""$a(BB 7a8R7SS\]k]p]p]r\st  (,,S..'SVddL',,S>>-A7TL}},,\r,B&##%$..):: Et~~FWEX Y',,./1  +//2q!<|?P?PQTVdVdfm?wwL',,S>>-A7TL
 %1$5$5c>>7$\!055cNN6JG]L$(!]]**<4<<RVR_R_*`
ii
L9#"6!OO2C..4H'S`S`3a2b c$$&') 
 "&&sNNG]]S!++Aq1 "))#GmmK01>AArI   )r>   r   r   r   r   r   r   r   r   r   r   r   )        FTFNNNNNF)rK   rL   rM   rN   r   intfloatboolr   r   r&   r   Tensorr   r   rG   rO   rP   rQ   s   @r@   r   r      sZ   G  )-CC C 	C
 C C C &C C>eU\\ eC ec e 488<1526"'vB||vB #5<<0vB !u||!45	vB
 !.vB "%,,/vB  vB 
u||Xell3XeELL>Q5RR	SvB vBrI   r   c                   f  ^  \ rS rSrSrU 4S jrS\R                  S\S\4S jr	     SS\R                  S	\
\R                     S
\
\\R                        S\
\R                     S\
\R                     S\S\\R                  \
\R                     \
\\R                        4   4S jjrSrU =r$ )HubertFlashAttention2i  a8  
Hubert flash attention module. This module inherits from `HubertAttention` as the weights of the module stays
untouched. The only required change would be on the forward pass where it needs to correctly call the public API of
flash attention and deal with padding tokens in case the input contains any of them.
c                 D   > [         TU ]  " U0 UD6  [        5       U l        g rl   )r%   r&   r   _flash_attn_uses_top_left_mask)r=   argskwargsr?   s      r@   r&   HubertFlashAttention2.__init__  s#    $)&)
 /P.Q+rI   r   r   r   c                 P    UR                  X2U R                  U R                  5      $ rl   )r   r   r   r   s       r@   _reshapeHubertFlashAttention2._reshape  s    {{3GGrI   rF   r   r   r   r   r   r   c                    US LnUR                  5       u  pn
U R                  U R                  U5      SU5      nU(       aR  UbO  US   R                  S   UR                  S   :X  a,  US   R	                  SS5      nUS   R	                  SS5      nGO,U(       aE  U R                  U R                  U5      SU5      nU R                  U R                  U5      SU5      nOUb  U R                  U R                  U5      SU5      nU R                  U R                  U5      SU5      n[        R                  " US   R	                  SS5      U/SS9n[        R                  " US   R	                  SS5      U/SS9nODU R                  U R                  U5      SU5      nU R                  U R                  U5      SU5      nU R                  (       a$  UR	                  SS5      UR	                  SS5      4nUR                  S   nUb  XS   R                  S   -  nUR                  nU[        R                  :X  a  [        R                  " 5       (       a  [        R                  " 5       nOR[        U R                  S5      (       a  U R                  R                   nO U R                  R"                  R                  n[$        R'                  SU S	35        UR)                  U5      nUR)                  U5      nUR)                  U5      n[+        UUUUU	U R,                  (       a  U R.                  OS
U R0                  U R2                  S9nUR5                  XS5      nU R7                  U5      nU(       d  S nUWU4$ )Nry   r   r   r   r   rx   _pre_quantization_dtypezThe input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in .r   )r   r   use_top_left_mask)r   r   r   r   rD   r   r   r   r   r   dtypefloat32is_autocast_enabledget_autocast_gpu_dtyper1   r>   r  r!   loggerwarning_oncetor   r   r   r   r   r   r   )r=   rF   r   r   r   r   r   r   r   q_lenr   r   r   r   
kv_seq_leninput_dtypetarget_dtyper   r   s                      r@   rG   HubertFlashAttention2.forward  s    .T9%**,A }}T[[%?SI *q!''*.>.D.DQ.GG (*44Q:J)!,66q!<Lt{{3C'Db#NJ==5E)FCPL't{{='A2sKJ==])CRMLN1$5$?$?1$Ez#RXYZJ 99nQ&7&A&A!Q&G%V\]^L t{{='A2sKJ==])CRML?? )221a8,:P:PQRTU:VWN%%b)
%+11"55J #((%--'((**$;;=&?@@#{{BB#{{1177 >$ (??<8L#|4J'??<8L.$(MMDLLsnn"AA	
 "))#b9mmK0 LL.88rI   )r   r   )rK   rL   rM   rN   r   r&   r   r   r   r   r   r   r   rG   rO   rP   rQ   s   @r@   r   r     s    RHu|| Hc H H 488<1526"'e9||e9 #5<<0e9 !u||!45	e9
 !.e9 "%,,/e9  e9 
u||Xell3XeELL>Q5RR	Se9 e9rI   r   c                   2  ^  \ rS rSr     SS\R
                  S\\R
                     S\\\R
                        S\\R
                     S\\R
                     S\S\\R
                  \\R
                     \\\R
                        4   4U 4S	 jjjr	S
r
U =r$ )HubertSdpaAttentioni  rF   r   r   r   r   r   r   c           	        > U(       a&  [         R                  S5        [        TU ]  UUUUUS9$ USLnUR	                  5       u  pn
U R                  U5      nU(       a2  Ub/  US   R                  S   UR                  S   :X  a  US   nUS   nGOU(       aE  U R                  U R                  U5      SU5      nU R                  U R                  U5      SU5      nOUby  U R                  U R                  U5      SU5      nU R                  U R                  U5      SU5      n[        R                  " US   U/SS9n[        R                  " US   U/SS9nODU R                  U R                  U5      SU5      nU R                  U R                  U5      SU5      nU R                  (       a  X4nU R                  XU5      nU R                  (       a  Uc  U	S:  a  S	OS
n[        R                  R                  R!                  UUUUU R"                  (       a  U R$                  OSUS9nUR	                  5       XR&                  XR(                  4:w  a5  [+        SXR&                  XR(                  4 SUR	                  5        35      eUR-                  SS5      nUR/                  XU R0                  5      nU R3                  U5      nUSU4$ )r   a  HubertModel is using HubertSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` . Falling back to the manual attention implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.)r   r   r   r   Nr   r   r   ry   r   TFr   )	attn_mask	dropout_pr   r   r   )r	  r
  r%   rG   r   r   r   r   r   r   r   r   r   r   r'   r   scaled_dot_product_attentionr   r   r   r   r   rD   r   r   r   )r=   rF   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r?   s                   r@   rG   HubertSdpaAttention.forward  s    l 7?!1--"3 #   .T9',,.a {{=1 *q!''*.>.D.DQ.GG (*J)!,LT[[1A%BBLJ;;t{{3C'Db#NL'T[[%?SIJ;;t{{='A2sKLN1$5z#BJJ 99nQ&7%FANL T[[%?SIJ;;t{{='A2sKL?? )7N{{<#>
 !NN~/E'TU+D[`	 hh))FF$&*mmdll G 
 #~~w!NN2CR_R_3`2a b$$&') 
 "++Aq1 "))#GmmK0D.00rI    r   )rK   rL   rM   rN   r   r   r   r   r   rG   rO   rP   rQ   s   @r@   r  r    s     488<1526"'e1||e1 #5<<0e1 !u||!45	e1
 !.e1 "%,,/e1  e1 
u||Xell3XeELL>Q5RR	Se1 e1rI   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertFeedForwardid  c                   > [         TU ]  5         [        R                  " UR                  5      U l        [        R                  " UR                  UR                  5      U l	        [        UR                  [        5      (       a  [        UR                     U l        OUR                  U l        [        R                  " UR                  UR                  5      U l        [        R                  " UR                   5      U l        g rl   )r%   r&   r'   r   activation_dropoutintermediate_dropoutr   r)   intermediate_sizeintermediate_dense
isinstance
hidden_actstrr   intermediate_act_fnoutput_densehidden_dropoutoutput_dropoutr   s     r@   r&   HubertFeedForward.__init__e  s    $&JJv/H/H$I!"$))F,>,>@X@X"Yf''--'-f.?.?'@D$'-'8'8D$IIf&>&>@R@RS jj)>)>?rI   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU$ rl   )r  r#  r  r$  r&  rE   s     r@   rG   HubertFeedForward.forwardr  sX    //>00?11-@))-8++M:rI   )r#  r  r  r$  r&  rJ   rQ   s   @r@   r  r  d  s    @ rI   r  )eagersdpaflash_attention_2c                   2   ^  \ rS rSrU 4S jrSS jrSrU =r$ )HubertEncoderLayeri  c                   > [         TU ]  5         [        UR                     " UR                  UR
                  UR                  SS9U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        g )NFr   r   r   r   r   )r%   r&   HUBERT_ATTENTION_CLASSES_attn_implementationr)   num_attention_headsattention_dropout	attentionr'   r   r%  r   rt   r   ru   r  feed_forwardfinal_layer_normr   s     r@   r&   HubertEncoderLayer.__init__  s    1&2M2MN((00,,	
 zz&"7"78,,v'9'9v?T?TU-f5 "V-?-?VEZEZ [rI   c                     UnU R                  XUS9u  pnU R                  U5      nXA-   nU R                  U5      nXR                  U5      -   nU R	                  U5      nU4nU(       a  Xu4-  nU$ Nr   r   )r5  r   ru   r6  r7  r=   rF   r   r   attn_residualr   r   outputss           r@   rG   HubertEncoderLayer.forward  s    %)-L] *8 *
&Q ]3%56%(9(9-(HH--m< "&GrI   )r5  r   r6  r7  ru   r   rJ   rQ   s   @r@   r.  r.    s    \ rI   r.  c                      ^  \ rS rSrU 4S jr    S
S\R                  S\\R                     S\	S\	S\	4
S jjr
S	rU =r$ )HubertEncoderi  c                   > [         TU ]  5         Xl        [        U5      U l        [
        R                  " UR                  UR                  S9U l	        [
        R                  " UR                  5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[!        U5      PM     sn5      U l        SU l        UR&                  S:H  U l        g s  snf Nr   Fr,  )r%   r&   r>   r   pos_conv_embedr'   rt   r)   r   ru   r   r%  r   r   r   num_hidden_layersr.  layersr   r2  _use_flash_attention_2r=   r>   r   r?   s      r@   r&   HubertEncoder.__init__  s    ;FC,,v'9'9v?T?TUzz&"7"78mmvOgOgIh$iIhA%7%?Ih$ij&+#&,&A&AEX&X# %j    CrF   r   r   output_hidden_statesreturn_dictc                    U(       a  SOS nU(       a  SOS nUb  UR                  S5      R                  SSUR                  S   5      nSX) '   U R                  (       a  Ub  SU;   a  UOS nOSUS S 2S S S S 24   R	                  UR
                  S9-
  nU[        R                  " UR
                  5      R                  -  nUR                  UR                  S   SUR                  S   UR                  S   5      nU R                  U5      n	X-   nU R                  U5      nU R                  U5      n[        5       =(       d    [        U 5      n
U R                   H  nU(       a  Xa4-   n[        R                   " / 5      nU R"                  (       a  XR$                  R&                  :  a  SOS	nU(       a  U
(       aM  U R(                  (       a0  U R"                  (       a  U R+                  UR,                  UUU5      nOU" XUS
9nUS   nU(       a  SnU(       d  M  UWS   4-   nM     U(       a  Xa4-   nU(       d  [/        S XU4 5       5      $ [1        UUUS9$ )Nr  ry   r   r   r         ?r  TFr;  NNc              3   .   #    U  H  oc  M  Uv   M     g 7frl   r  .0vs     r@   	<genexpr>(HubertEncoder.forward.<locals>.<genexpr>       m$[q$[   	last_hidden_staterF   
attentions)	unsqueezerepeatr   rG  r  r  r   finfominexpandrD  ru   r   r	   r
   rF  randr   r>   	layerdropr   r   r   tupler   r=   rF   r   r   rK  rL  all_hidden_statesall_self_attentionsexpand_attention_maskposition_embeddingssynced_gpusr   dropout_probabilityskip_the_layerlayer_outputss                  r@   rG   HubertEncoder.forward  sF    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M01**4B4NSTXfSfmq "%~atQ6F'G'J'JQ^QdQd'J'e!e!/%++m>Q>Q2R2V2V!V!/!6!6"((+Q0D0DR0H.J^J^_aJb" #11-@%;6]302R6LT6R[[E#$58H$H! #(**R.%)]]8KkkNcNc8cTjoN![..4==$($E$E%&)	%M %*%Xi%M !.a 0 ,  &9]1=M<O&O#7 !:   14D Dm]GZ$[mmm++*
 	
rI   rG  r>   r   r   ru   rF  rD  NFFT)rK   rL   rM   rN   r&   r   r   r   r   r   rG   rO   rP   rQ   s   @r@   rA  rA    se    Y 26"'%* G
||G
 !.G
  	G

 #G
 G
 G
rI   rA  c                   J   ^  \ rS rSrU 4S jrS\R                  4S jrSrU =r	$ )HubertAttnAdapterLayeri  c                   > [         TU ]  5         UR                  U l        UR                  U l        [        R                  " U R
                  5      U l        [        R                  " U R
                  U R                  5      U l
        [        R                  " 5       U l        [        R                  " U R                  U R
                  5      U l        g)z
Implements adapter modules directly with 3D tensor weight as parameters and without using ModuleList to speed
up training throughput.
N)r%   r&   adapter_attn_dim	input_dimr)   
hidden_dimr'   rt   normr   linear_1ReLUact_fnlinear_2r   s     r@   r&   HubertAttnAdapterLayer.__init__  s    
 	00 ,,LL1			$//4>>Bggi		$..$//BrI   rF   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ rl   )rv  rw  ry  rz  rE   s     r@   rG   HubertAttnAdapterLayer.forward
  s@    		-0m4M2m4rI   )ry  ru  rt  rw  rz  rv  )
rK   rL   rM   rN   r&   r   FloatTensorrG   rO   rP   rQ   s   @r@   rq  rq    s     CU%6%6  rI   rq  c                   t   ^  \ rS rSrU 4S jr  SS\R                  S\\R                     S\4S jjr	Sr
U =r$ )	!HubertEncoderLayerStableLayerNormi  c                 
  > [         TU ]  5         [        UR                     " UR                  UR
                  UR                  SS9U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        [%        USS 5      b  ['        U5      U l        g S U l        g )NFr0  r   rs  )r%   r&   r1  r2  r)   r3  r4  r5  r'   r   r%  r   rt   r   ru   r  r6  r7  getattrrq  adapter_layerr   s     r@   r&   *HubertEncoderLayerStableLayerNorm.__init__  s    1&2M2MN((00,,	
 zz&"7"78,,v'9'9v?T?TU-f5 "V-?-?VEZEZ [6-t4@!7!?D!%DrI   rF   r   r   c                    UnU R                  U5      nU R                  XUS9u  pnU R                  U5      nXA-   nXR                  U R	                  U5      5      -   nU R
                  b  XR                  U5      -   nU4nU(       a  Xu4-  nU$ r:  )ru   r5  r   r6  r7  r  r<  s           r@   rG   )HubertEncoderLayerStableLayerNorm.forward'  s     &6)-L] *8 *
&Q ]3%5%(9(9$:O:OP]:^(__)),>,>},MMM "&GrI   )r  r5  r   r6  r7  ru   r   )rK   rL   rM   rN   r&   r   r   r   r   rG   rO   rP   rQ   s   @r@   r  r    sC    &* 26"'	|| !.  	 rI   r  c                   :   ^  \ rS rSrU 4S jr    SS jrSrU =r$ )HubertEncoderStableLayerNormiA  c                   > [         TU ]  5         Xl        [        U5      U l        [
        R                  " UR                  UR                  S9U l	        [
        R                  " UR                  5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[!        U5      PM     sn5      U l        SU l        UR&                  S:H  U l        g s  snf rC  )r%   r&   r>   r   rD  r'   rt   r)   r   ru   r   r%  r   r   r   rE  r  rF  r   r2  rG  rH  s      r@   r&   %HubertEncoderStableLayerNorm.__init__B  s    ;FC,,v'9'9v?T?TUzz&"7"78mm@EfF^F^@_`@_1.v6@_`
 ',#&,&A&AEX&X# arJ  c                    U(       a  SOS nU(       a  SOS nUb  UR                  S5      R                  SSUR                  S   5      nXR                  UR                  S9-  nU R
                  (       a  Ub  SU;   a  UOS nOSUS S 2S S S S 24   R                  UR                  S9-
  nU[        R                  " UR                  5      R                  -  nUR                  UR                  S   SUR                  S   UR                  S   5      nU R                  U5      n	X-   nU R                  U5      n[        5       =(       d    [        U 5      n
U R                   H  nU(       a  Xa4-   n[        R                  " / 5      nU R                   (       a  XR"                  R$                  :  a  SOS	nU(       a  U
(       aM  U R&                  (       a0  U R                   (       a  U R)                  UR*                  UUU5      nOU" XUS
9nUS   nU(       a  SnU(       d  M  UWS   4-   nM     U R-                  U5      nU(       a  Xa4-   nU(       d  [/        S XU4 5       5      $ [1        UUUS9$ )Nr  ry   r   r   rO  r   rN  TFr;  rP  c              3   .   #    U  H  oc  M  Uv   M     g 7frl   r  rR  s     r@   rU  7HubertEncoderStableLayerNorm.forward.<locals>.<genexpr>  rW  rX  rY  )r\  r]  r   r  r  rG  r   r^  r_  r`  rD  r   r	   r
   rF  ra  r   r>   rb  r   r   r   ru   rc  r   rd  s                  r@   rG   $HubertEncoderStableLayerNorm.forwardN  sY    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!),D,D=K^K^,D,__M**4B4NSTXfSfmq "%~atQ6F'G'J'JQ^QdQd'J'e!e!/%++m>Q>Q2R2V2V!V!/!6!6"((+Q0D0DR0H.J^J^_aJb" #11-@%;]302R6LT6R[[E#$58H$H! #(**R.%)]]8KkkNcNc8cTjoN![ ..4==$($E$E%&)	%M %*%Xi%M !.a 0 ,  &9]1=M<O&O#9 !< 6 14D Dm]GZ$[mmm++*
 	
rI   rn  ro  rJ   rQ   s   @r@   r  r  A  s#    
Y "I
 I
rI   r  c                       \ rS rSr\rSrSrSrSr	Sr
S rS\\R                  \4   4S jrS\S	\R                  4S
 jrSrg)HubertPreTrainedModeli  hubertr   Tc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  [        R                  [        R                  45      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        R                  5      (       Gai  [        5       (       a  SSKn[#        US5      (       a~  [#        US5      (       am  UR$                  R'                  UR(                  UR*                  /SS9   [        R,                  R/                  UR                  R                  5        SSS5        OUR$                  R'                  UR                  SS9   [        R,                  R/                  UR                  R                  5        SSS5        O3[        R,                  R/                  UR                  R                  5        UR                  b%  UR                  R                  R                  5         gg[        U[0        5      (       a7  [#        US	5      (       a%  UR2                  R                  R5                  5         gg[        U[6        5      (       aR  [#        US
5      (       a@  UR8                  R                  R                  SU R                  R:                  S-   -  5        ggg! , (       d  f       N= f! , (       d  f       GN= f)zInitialize the weightsr   )meanstdNrN  r   r8   r7   r   masked_spec_embedlayer_weightsr   )r   r'   r   r!   datanormal_r>   initializer_rangera   zero_rt   r   r/   fill_r(   r	   r2   r1   r3   r4   r8   r7   initkaiming_normal_HubertModelr  uniform_HubertForSequenceClassificationr  rE  )r=   moduler2   s      r@   _init_weights#HubertPreTrainedModel._init_weights  sV   fbii(( MM&&CT[[5R5R&S{{&  &&( 'r||R^^ LMMKK""$MM$$S)		**)++ 6:..76:3N3N"::FOOV__;]mn:o//0B0BC po #::6==XY:Z//0B0BC [Z ''(:(:;{{&  &&( ',,v233((--668 4 ?@@v//$$))//t{{7T7TWX7X0YZ 0 A po [Zs   4M94M!
M!
M0input_lengthsc                     S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )z8
Computes the output length of the convolutional layers
c                 8    [         R                  " X-
  USS9S-   $ )Nfloor)rounding_moder   )r   div)input_lengthr   r`   s      r@   _conv_out_lengthPHubertPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length  s      99\7wWZ[[[rI   )zipr>   re   rf   )r=   r  r  r   r`   s        r@    _get_feat_extract_output_lengths6HubertPreTrainedModel._get_feat_extract_output_lengths  sG    
	\
 $'t{{'>'>@W@W#XK,]PM $Y rI   feature_vector_lengthr   c                    U R                  UR                  S5      5      R                  [        R                  5      nUR
                  S   n[        R                  " XA4UR                  UR                  S9nSU[        R                  " UR
                  S   UR                  S9US-
  4'   UR                  S/5      R                  S5      R                  S/5      R                  5       nU$ )Nry   r   )r  devicer   )r  )r  sumr  r   longr   zerosr  r  arangeflipcumsumr   )r=   r  r   output_lengths
batch_sizes        r@   "_get_feature_vector_attention_mask8HubertPreTrainedModel._get_feature_vector_attention_mask  s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
/~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOrI   r  N)rK   rL   rM   rN   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_2_supports_sdpar  r   r   
LongTensorr   r  r  rO   r  rI   r@   r  r    sa    L $O&*#!N[BeEDTDTVYDY>Z 
 
]b]m]m 
rI   r  r   	mask_probmask_lengthr   	min_masksr   c           	        ^^^^^ U u  nmTS:  a  [        S5      eTT:  a  [        ST ST S35      e[        R                  R                  S5      R	                  5       mUUUUU4S jnUb-  UR                  5       R                  S5      R                  5       O[        U5       Vs/ s H  nTPM     snn[        R                  " UT4[        S	9n	/ n
U" T5      nUS
:X  a  U	$ U H  nU" U5      n[        R                  R                  [        R                  " UTS-
  -
  5      USS9n[        U5      S
:X  a  TS-
  nOUS
   n[        R                  " U[        R                  " X-
  [        R                   S	9U-  /5      nU
R#                  U5        M     [        R$                  " U
5      n
[        R&                  " U
SS2SS2S4   X[T45      n
U
R)                  X[T-  5      n
[        R                  " T5      SSSS24   n[        R&                  " UX[T45      R)                  X[T-  5      nU
U-   n
U
R+                  5       TS-
  :  a  TS-
  XTS-
  :  '   [        R,                  " XSS5        U	$ s  snf )a*  
Computes random mask spans for a given shape. Used to implement [SpecAugment: A Simple Data Augmentation Method for
ASR](https://arxiv.org/abs/1904.08779). Note that this method is not optimized to run on TPU and should be run on
CPU as part of the preprocessing during training.

Args:
    shape: The shape for which to compute masks. This should be of a tuple of size 2 where
           the first element is the batch size and the second element is the length of the axis to span.
    mask_prob:  The percentage of the whole axis (between 0 and 1) which will be masked. The number of
                independently generated mask spans of length `mask_length` is computed by
                `mask_prob*shape[1]/mask_length`. Note that due to overlaps, `mask_prob` is an upper bound and the
                actual percentage will be smaller.
    mask_length: size of the mask
    min_masks: minimum number of masked spans
    attention_mask: A (right-padded) attention mask which independently shortens the feature axis of
                    each batch dimension.
r   z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: `c                    > [        TU -  T-  T-   5      n[        UT5      nUT-  T:  a  TT-  nU TS-
  -
  U:  a  [        U TS-
  -
  S5      nU$ )z;Given input length, compute how many spans should be maskedr   r   )r   max)r  num_masked_spanepsilonr  r  r  sequence_lengths     r@   compute_num_masked_span6_compute_mask_indices.<locals>.compute_num_masked_span  so    i,6DwNOoy9 [(?:-<O ;?+o=!,+/"BAFOrI   Nry   rO  r   F)replace)r   nprandomra  itemdetachr  tolistr   r  r   choicer  lenconcatenateonesint32appendarraybroadcast_tor   r  put_along_axis)r   r  r  r   r  r  r  r   r  spec_aug_maskspec_aug_mask_idxsmax_num_masked_spanr  r  spec_aug_mask_idxdummy_mask_idxoffsetsr  r  s    `` `            @@r@   _compute_mask_indicesr    s   0 #(JQABB_$]^i]j&&7q:
 	
 iinnQ$$&G $ % 	##B'..0',Z'89'8!o'89  HHj/:$GM1/Ba%1,? II,,IIlkAo67RW - 
  !Q& -q0N.q1NNN(;(MUWU]U] ^ao op
 	!!"34/ &2 "45 1a:&+(V ,33JVa@ab ii$T4]3Goog
'UV^^+5G ,g5 /A"55GVYZGZ!0CCD mB?w :s   (I0c                   >  ^  \ rS rSrS\4U 4S jjr  SS\R                  S\\R                     S\\R                     4S jjr
\     SS\\R                     S\\R                     S\\R                     S	\\   S
\\   S\\   S\\\4   4S jj5       rSrU =r$ )r  iW  r>   c                   > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        UR                  S:  d  UR                  S:  aG  [        R                  " [        R                  " UR                  5      R                  5       5      U l        UR                   (       a  [#        U5      U l        O['        U5      U l        U R)                  5         g )Nr   )r%   r&   r>   r   feature_extractorr   feature_projectionmask_time_probmask_feature_probr'   	Parameterr   r   r)   r  r  do_stable_layer_normr  encoderrA  	post_initr   s     r@   r&   HubertModel.__init__Y  s     !5f!="9&"A   3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"&&7?DL(0DL 	rI   rF   mask_time_indicesr   c                    [        U R                  SS5      (       d  U$ UR                  5       u  pEnUb(  U R                  R	                  UR
                  5      X'   OU R                  R                  S:  a  U R                  (       a  [        XE4U R                  R                  U R                  R                  UU R                  R                  S9n[        R                  " X!R                  [        R                  S9nU R                  R	                  UR
                  5      X'   U R                  R                  S:  a  U R                  (       a  [        XF4U R                  R                  U R                  R                   U R                  R"                  S9n[        R                  " XqR                  [        R                  S9nUSS2S4   R%                  SUS5      nSX'   U$ )	z
Masks extracted features along time axis and/or along feature axis according to
[SpecAugment](https://arxiv.org/abs/1904.08779).
apply_spec_augmentTNr   )r  r  r   r  )r  r  )r  r  r  ry   )r  r>   r   r  r  r  r  r   r  mask_time_lengthmask_time_min_masksr   r   r  r   r  mask_feature_lengthmask_feature_min_masksr`  )r=   rF   r  r   r  r  r)   mask_feature_indicess           r@   _mask_hidden_statesHubertModel._mask_hidden_statesk  s    t{{$8$??   4A3E3E3G0
[(/3/E/E/H/HI\I\/]M,[[''!+ 5-++44 KK88-++99! !&->G[G[chcmcm n/3/E/E/H/HI\I\/]M,;;((1,#8)++77 KK;;++<<	$  $)<<0DMaMainisis#t #74#@#G#GO]_#` 23M/rI   r   r   rK  rL  r   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nUR                  SS5      nUb  U R                  UR                  S   U5      nU R                  U5      nU R                  XS9nU R                  UUUUUS9n	U	S   nU(       d	  U4U	SS -   $ [        UU	R                  U	R                  S9$ )a  
mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
    masked extracted features in *config.proj_codevector_dim* space.

Example:

```python
>>> from transformers import AutoProcessor, HubertModel
>>> from datasets import load_dataset
>>> import soundfile as sf

>>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
>>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


>>> def map_to_array(batch):
...     speech, _ = sf.read(batch["file"])
...     batch["speech"] = speech
...     return batch


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
>>> hidden_states = model(input_values).last_hidden_state
```Nr   r   )r  r   r   rK  rL  r   rY  )r>   r   rK  use_return_dictr  rD   r  r   r  r  r  r   rF   r[  )
r=   r   r   r  r   rK  rL  extract_featuresrF   encoder_outputss
             r@   rG   HubertModel.forward  s)   L 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]11,?+55a;%!DDEUE[E[\]E^`noN//0@A000d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
rI   )r>   r  r  r  r  rP  NNNNN)rK   rL   rM   rN   r   r&   r   r~  r   r  r  r   r   r   r   r   r   rG   rO   rP   rQ   s   @r@   r  r  W  s    | * :>59	,((, $E$5$56, !!1!12	,\  269=,0/3&*F
u||,F
 !.F
 $E$5$56	F

 $D>F
 'tnF
 d^F
 
uo%	&F
 F
rI   r  zn
    Hubert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
    )custom_introc                      ^  \ rS rSrSS\\   4U 4S jjjrS rS rS r	S r
\     SS\\R                     S	\\R                     S
\\   S\\   S\\   S\\R                     S\\\4   4S jj5       rSrU =r$ )HubertForCTCi  target_langc                   > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        X l        UR                  c  [        SU R                   S35      e[        US5      (       a  UR                  (       a  UR                  OUR                  n[        R                   " X1R                  5      U l        U R%                  5         g)a  
target_lang (`str`, *optional*):
    Language id of adapter weights. Adapter weights are stored in the format adapter.<lang>.safetensors or
    adapter.<lang>.bin. Only relevant when using an instance of [`HubertForCTC`] with adapters. Uses 'eng' by
    default.
NzYou are trying to instantiate z with a configuration that does not define the vocabulary size of the language model head. Please instantiate the model as follows: `HubertForCTC.from_pretrained(..., vocab_size=vocab_size)`. or define `vocab_size` of your model's configuration.add_adapter)r%   r&   r  r  r'   r   final_dropoutr   r  
vocab_sizer   r?   r1   r  output_hidden_sizer)   r   lm_headr  )r=   r>   r  r
  r?   s       r@   r&   HubertForCTC.__init__  s     	 !&)zz&"6"67&$00@ AH H  *1)G)GFL^L^F%%djdvdv 	 yy!35F5FG 	rI   c                     U R                   nUb'  [        U R                  SS5      c  [        SU S35      eUc.  [        U R                  SS5      b  [        R                  S5        gUb  U R                  USS9  gg)a  
This method overwrites [`~PreTrainedModel.tie_weights`] so that adapter weights can be correctly loaded when
passing `target_lang=...` to `from_pretrained(...)`.

This method is **not** supposed to be called by the user and is prone to be changed in the future.
Nrs  zCannot pass `target_lang`: z- if `config.adapter_attn_dim` is not defined.z)By default `target_lang` is set to 'eng'.T)
force_load)r  r  r>   r   r	  infoload_adapter)r=   r  s     r@   tie_weightsHubertForCTC.tie_weights	  s     &&"wt{{<NPT'U']:;-Gtuvv WT[[:Ld%S%_KKCD$kd; %rI   c                 Z    [         R                  " S[        5        U R                  5         g)
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. Please use the equivalent `freeze_feature_encoder` method instead.NwarningswarnFutureWarningfreeze_feature_encoderr=   s    r@   freeze_feature_extractor%HubertForCTC.freeze_feature_extractor  '    
 	Q	

 	##%rI   c                 L    U R                   R                  R                  5         gr  Nr  r  r   r  s    r@   r  #HubertForCTC.freeze_feature_encoder*      
 	%%88:rI   c                 T    U R                   R                  5        H
  nSUl        M     gz
Calling this function will disable the gradient computation for the base model so that its parameters will not
be updated during training. Only the classification head will be updated.
FNr  r   r   r   s     r@   freeze_base_modelHubertForCTC.freeze_base_model1  #    
 [[++-E"'E .rI   r   r   r   rK  rL  labelsr   c                    Ub  UOU R                   R                  nUbJ  UR                  5       U R                   R                  :  a"  [	        SU R                   R                   35      eU R                  UUUUUS9nUS   nU R                  U5      nU R                  U5      n	Sn
UGbX  Ub  UO"[        R                  " U[        R                  S9nU R                  UR                  S5      5      R                  [        R                  5      nUS:  nUR                  S5      nUR                  U5      n[        R                   R#                  U	S[        R$                  S9R'                  SS5      n[        R(                  R*                  R-                  S	S
9   [        R                   R/                  UUUUU R                   R0                  U R                   R2                  U R                   R4                  S9n
SSS5        U(       d  U	4U[6        S -   nU
b  U
4U-   $ U$ [9        XUR:                  UR<                  S9$ ! , (       d  f       NL= f)a  
labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
    Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to
    the sequence length of the output logits. Indices are selected in `[-100, 0, ..., config.vocab_size - 1]`.
    All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
    config.vocab_size - 1]`.
Nz$Label values must be <= vocab_size: r  r   rO  ry   )r#   r  r   F)enabled)blank	reductionzero_infinitylosslogitsrF   r[  )r>   r  r  r	  r   r  r   r  r   	ones_liker  r  r  r  masked_selectr'   r   log_softmaxr  rD   backendscudnnflagsctc_losspad_token_idctc_loss_reductionctc_zero_infinity_HIDDEN_STATES_START_POSITIONr   rF   r[  )r=   r   r   r   rK  rL  r*  r>  rF   r2  r1  r  labels_masktarget_lengthsflattened_targets	log_probsoutputs                    r@   rG   HubertForCTC.forward9  s   " &1%<k$++B]B]&**,$++2H2H"HCDKKDZDZC[\]]++)/!5#  
  
]3m, #1"<%//R^fkfpfpBq  !AA.BTBTUWBXY\\]b]g]ghM !A+K(__R0N & 4 4[ A 11&b1V``abdefI%%++E+:}}--%!"++22"kk<<"&++"?"? .  ; Y)F)G!HHF)-)9TGf$EvEG4I4IV]VhVh
 	
 ;:s   A H??
I)r   r  r  r  rl   r  )rK   rL   rM   rN   r   r"  r&   r  r  r  r'  r   r   r   r   r   r   r   rG   rO   rP   rQ   s   @r@   r  r    s    HSM  :<*
&;(  26,0/3&*)-D
u||,D
 !.D
 $D>	D

 'tnD
 d^D
 &D
 
un$	%D
 D
rI   r  z
    Hubert Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like
    SUPERB Keyword Spotting.
    c                      ^  \ rS rSrU 4S jrS rS rS r\     SS\	\
R                     S\	\
R                     S\	\   S	\	\   S
\	\   S\	\
R                     S\\\4   4S jj5       rSrU =r$ )r  i  c                 "  > [         TU ]  U5        [        US5      (       a  UR                  (       a  [	        S5      e[        U5      U l        UR                  S-   nUR                  (       a2  [        R                  " [        R                  " U5      U-  5      U l        [        R                  " UR                  UR                   5      U l        [        R                  " UR                   UR$                  5      U l        U R)                  5         g )Nr  z]Sequence classification does not support the use of Hubert adapters (config.add_adapter=True)r   )r%   r&   r1   r  r   r  r  rE  use_weighted_layer_sumr'   r  r   r  r  r   r)   classifier_proj_size	projector
num_labels
classifierr  )r=   r>   
num_layersr?   s      r@   r&   (HubertForSequenceClassification.__init__  s     6=))f.@.@o  "&)--1
((!#ejj.Dz.Q!RD6#5#5v7R7RS))F$?$?ARARS 	rI   c                 Z    [         R                  " S[        5        U R                  5         g)z
Calling this function will disable the gradient computation for the feature encoder so that its parameters will
not be updated during training.
r  Nr  r  s    r@   r  8HubertForSequenceClassification.freeze_feature_extractor  r  rI   c                 L    U R                   R                  R                  5         gr   r!  r  s    r@   r  6HubertForSequenceClassification.freeze_feature_encoder  r#  rI   c                 T    U R                   R                  5        H
  nSUl        M     gr%  r&  r   s     r@   r'  1HubertForSequenceClassification.freeze_base_model  r)  rI   r   r   r   rK  rL  r*  r   c                 0   Ub  UOU R                   R                  nU R                   R                  (       a  SOUnU R                  UUUUUS9nU R                   R                  (       ai  U[           n[
        R                  " USS9n[        R                  R                  U R                  SS9n	XR                  SSS5      -  R                  SS9nOUS   nU R                  U5      nUc  UR                  SS9n
OU R                  UR                   S   U5      nUR#                  S5      R%                  SSUR                   S   5      nS	X) '   UR                  SS9UR                  SS9R                  SS5      -  n
U R'                  U
5      nSnUbF  [)        5       nU" UR                  SU R                   R*                  5      UR                  S5      5      nU(       d  U4U[        S -   nUb  U4U-   $ U$ [-        UUUR.                  UR0                  S
9$ )ag  
input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Float values of input raw speech waveform. Values can be obtained by loading a `.flac` or `.wav` audio file
    into an array of type `List[float]` or a `numpy.ndarray`, *e.g.* via the soundfile library (`pip install
    soundfile`). To prepare the array into `input_values`, the [`AutoProcessor`] should be used for padding and
    conversion into a tensor of type `torch.FloatTensor`. See [`HubertProcessor.__call__`] for details.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
NTr  r   r   ry   r   r   r   r0  )r>   r  rF  r  r=  r   stackr'   r   r   r  r   r  rH  r  r  r   r\  r]  rJ  r   rI  r   rF   r[  )r=   r   r   r   rK  rL  r*  r>  rF   norm_weightspooled_outputpadding_maskexpand_padding_maskr2  r1  loss_fctrB  s                    r@   rG   'HubertForSequenceClassification.forward  s   , &1%<k$++B]B]'+{{'I'ItOc++)/!5#  
 ;;--#$ABM!KK1=M==001C1C0LL*->->r1a-HHMMRSMTM#AJM}5!)..1.5MBB=CVCVWXCY[ijL"."8"8"<"C"CAq-J]J]^_J`"a25M./)--!-4|7G7GA7G7N7S7STVXY7ZZM/')HFKKDKK,B,BCV[[QS_UDY)F)G!HHF)-)9TGf$EvE'!//))	
 	
rI   )rJ  r  r  rH  r  )rK   rL   rM   rN   r&   r  r  r'  r   r   r   r   r   r   r   r   rG   rO   rP   rQ   s   @r@   r  r    s    "
&;(  26,0/3&*)-A
u||,A
 !.A
 $D>	A

 'tnA
 d^A
 &A
 
u..	/A
 A
rI   r  )r  r  r  r  rX   )@r  typingr   r   r   numpyr  r   torch.nnr'   r   activationsr   integrations.deepspeedr	   integrations.fsdpr
   modeling_flash_attention_utilsr   r   modeling_outputsr   r   r   modeling_utilsr   r0   r   r   configuration_hubertr   r   
get_loggerrK   r	  Moduler   r:   r\   rp   r}   r   r   r   r   r  r  r1  r.  rA  rq  r  r  r  r   r   r  ndarrayr  r  r=  r  r  __all__r  rI   r@   <module>ri     s~    ) )    % ! @ 7 h Y Y - , . J 
		H	%/BII /d  *ryy 6ryy 0)299 )Xbii $[Bbii [B|w9O w9tf1/ f1R		 2 .    FR
BII R
jRYY 2*		 *ZV
299 V
r BO B BR 26tc?tt t U--.	t
 t ZZtn H
' H
 H
V !"  
S
( S

S
l o
&; o
o
d frI   