
    fTh4,                        S SK JrJrJr  S SKrS SKJr  SSKJr  SSK	J
r
  SSKJr  SSKJr  SSKJr  S	S
KJrJrJrJrJrJrJr  SSKJr  Sr " S S\R8                  5      r " S S\5      r " S S\5      r " S S\R8                  5      r  " S S\5      r! " S S\5      r"\ " S S\5      5       r# " S S\\#5      r$ " S S\5      r% " S S \5      r&/ S!Qr'g)"    )OptionalTupleUnionN   )ACT2FN)is_deepspeed_zero3_enabled)BaseModelOutput)PreTrainedModel)auto_docstring   )Wav2Vec2EncoderWav2Vec2EncoderStableLayerNormWav2Vec2FeatureEncoderWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ModelWav2Vec2SamePadLayer   )HubertConfigc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertPositionalConvEmbedding   c                 2  > [         TU ]  5         [        R                  " UR                  UR                  UR
                  UR
                  S-  UR                  S9U l        S U l        UR                  (       a'  [        R                  " UR                  5      U l        GO[        R                  R                  n[        [        R                  R                  S5      (       a$  [        R                  R                  R                  n[        5       (       Ga%  SS KnUR"                  R%                  U R                  R&                  SS9   U" U R                  SSS9U l        S S S 5        [        U R                  S5      (       aU  U R                  R                  R&                  R(                  nU R                  R                  R&                  R*                  nO,U R                  R,                  nU R                  R.                  nUR"                  R1                  X5        UR"                  R1                  X5        OU" U R                  SSS9U l        [3        UR
                  5      U l        [6        UR8                     U l        g ! , (       d  f       GN,= f)	Nr   )kernel_sizepaddinggroupsweight_normr   modifier_rankweight)namedimparametrizations)super__init__nnConv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsconv
batch_normconv_pos_batch_normBatchNorm1dutilsr   hasattrr#   r   	deepspeedzeroGatheredParametersr    	original0	original1weight_gweight_vregister_external_parameterHubertSamePadLayerr   r   feat_extract_activation
activation)selfconfigr   r1   r6   r7   	__class__s         a/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/hubert/modular_hubert.pyr%   &HubertPositionalConvEmbedding.__init__   s   II6622a777
	 %% nnV-?-?@DO((..Krxx00-@@ hh77CC)++ ^^66tyy7G7GWX6Y +DIIH! LDI Z499&899#yy99@@JJH#yy99@@JJH#yy11H#yy11H::4J::4J'		aH	)&*H*HI !?!?@ ZYs   
J
Jc                     UR                  SS5      nU R                  b  U R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nUR                  SS5      nU$ )Nr   r   )	transposer,   r+   r   r;   r<   hidden_statess     r?   forward%HubertPositionalConvEmbedding.forward@   sn    %//15??& OOM:M		-0]36%//15    )r;   r,   r+   r   __name__
__module____qualname____firstlineno__r%   rE   __static_attributes____classcell__r>   s   @r?   r   r      s    #AJ	 	rG   r   c                       \ rS rSrSrg)r9   L    NrI   rJ   rK   rL   rM   rR   rG   r?   r9   r9   L       rG   r9   c                       \ rS rSrSrg)HubertFeatureEncoderP   rR   NrS   rR   rG   r?   rV   rV   P   rT   rG   rV   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertFeatureProjectionT   c                 x  > [         TU ]  5         UR                  U l        U R                  (       a1  [        R                  " UR
                  S   UR                  S9U l        [        R                  " UR
                  S   UR                  5      U l
        [        R                  " UR                  5      U l        g )N)eps)r$   r%   feat_proj_layer_normr&   	LayerNormconv_dimlayer_norm_eps
layer_normLinearr(   
projectionDropoutfeat_proj_dropoutdropoutr<   r=   r>   s     r?   r%    HubertFeatureProjection.__init__U   s}    $*$?$?!$$ ll6??2+>FDYDYZDO))FOOB$79K9KLzz&":":;rG   c                     U R                   (       a  U R                  U5      nU R                  U5      nU R                  U5      nU$ )N)r^   rb   rd   rg   rC   s     r?   rE   HubertFeatureProjection.forward]   s;    $$ OOM:M6]3rG   )rg   r^   rb   rd   rH   rO   s   @r?   rY   rY   T   s    < rG   rY   c                       \ rS rSrSrg)HubertEncoderf   rR   NrS   rR   rG   r?   rm   rm   f   rT   rG   rm   c                       \ rS rSrSrg)HubertEncoderStableLayerNormj   rR   NrS   rR   rG   r?   rp   rp   j   rT   rG   rp   c                       \ rS rSr\rSrSrSrSr	Sr
S rS\\R                  \4   4S jrS\S	\R                  4S
 jrSrg)HubertPreTrainedModeln   hubertinput_valuesTc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  [        R                  [        R                  45      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        R                  5      (       Gai  [        5       (       a  SSKn[#        US5      (       a~  [#        US5      (       am  UR$                  R'                  UR(                  UR*                  /SS9   [        R,                  R/                  UR                  R                  5        SSS5        OUR$                  R'                  UR                  SS9   [        R,                  R/                  UR                  R                  5        SSS5        O3[        R,                  R/                  UR                  R                  5        UR                  b%  UR                  R                  R                  5         gg[        U[0        5      (       a7  [#        US	5      (       a%  UR2                  R                  R5                  5         gg[        U[6        5      (       aR  [#        US
5      (       a@  UR8                  R                  R                  SU R                  R:                  S-   -  5        ggg! , (       d  f       N= f! , (       d  f       GN= f)zInitialize the weights        )meanstdNg      ?r   r7   r6   r   masked_spec_embedlayer_weightsr   )
isinstancer&   rc   r    datanormal_r=   initializer_rangebiaszero_r_   	GroupNormr.   fill_r'   r   r1   r0   r2   r3   r7   r6   initkaiming_normal_HubertModelr{   uniform_HubertForSequenceClassificationr|   num_hidden_layers)r<   moduler1   s      r?   _init_weights#HubertPreTrainedModel._init_weightsw   sV   fbii(( MM&&CT[[5R5R&S{{&  &&( 'r||R^^ LMMKK""$MM$$S)		**)++ 6:..76:3N3N"::FOOV__;]mn:o//0B0BC po #::6==XY:Z//0B0BC [Z ''(:(:;{{&  &&( ',,v233((--668 4 ?@@v//$$))//t{{7T7TWX7X0YZ 0 A po [Zs   4M94M!
M!
M0input_lengthsc                     S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )z8
Computes the output length of the convolutional layers
c                 8    [         R                  " X-
  USS9S-   $ )Nfloor)rounding_moder   )torchdiv)input_lengthr   strides      r?   _conv_out_lengthPHubertPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length   s      99\7wWZ[[[rG   )zipr=   conv_kernelconv_stride)r<   r   r   r   r   s        r?    _get_feat_extract_output_lengths6HubertPreTrainedModel._get_feat_extract_output_lengths   sG    
	\
 $'t{{'>'>@W@W#XK,]PM $Y rG   feature_vector_lengthattention_maskc                    U R                  UR                  S5      5      R                  [        R                  5      nUR
                  S   n[        R                  " XA4UR                  UR                  S9nSU[        R                  " UR
                  S   UR                  S9US-
  4'   UR                  S/5      R                  S5      R                  S/5      R                  5       nU$ )Nr\   r   )dtypedevicer   )r   )r   sumtor   longshapezerosr   r   arangeflipcumsumbool)r<   r   r   output_lengths
batch_sizes        r?   "_get_feature_vector_attention_mask8HubertPreTrainedModel._get_feature_vector_attention_mask   s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
/~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOrG   rR   N)rI   rJ   rK   rL   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_2_supports_sdpar   r   r   
LongTensorintr   r   rM   rR   rG   r?   rs   rs   n   sa    L $O&*#!N[BeEDTDTVYDY>Z 
 
]b]m]m 
rG   rs   c                      ^  \ rS rSrS\4U 4S jjrS rS r     SS\\	R                     S\\	R                     S\\	R                     S	\\   S
\\   S\\   S\\\4   4S jjrSrU =r$ )r      r=   c                   > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        UR                  S:  d  UR                  S:  aG  [        R                  " [        R                  " UR                  5      R                  5       5      U l        UR                   (       a  [#        U5      U l        O['        U5      U l        U R)                  5         U ?g )Nrx   )r$   r%   r=   rV   feature_extractorrY   feature_projectionmask_time_probmask_feature_probr&   	Parameterr   Tensorr(   r   r{   do_stable_layer_normrp   encoderrm   	post_initadapterrh   s     r?   r%   HubertModel.__init__   s     !5f!="9&"A  3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"&&7?DL(0DL 	LrG   c                     [        S5      eNzNot needed for HubertAttributeErrorr<   s    r?   freeze_feature_extractor$HubertModel.freeze_feature_extractor       455rG   c                     [        S5      er   r   r   s    r?   freeze_feature_encoder"HubertModel.freeze_feature_encoder   r   rG   rv   r   mask_time_indicesoutput_attentionsoutput_hidden_statesreturn_dictreturnc                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nUR                  SS5      nUb  U R                  UR                  S   U5      nU R                  U5      nU R                  XS9nU R                  UUUUUS9n	U	S   nU(       d	  U4U	SS -   $ [        UU	R                  U	R                  S9$ )a  
mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
    masked extracted features in *config.proj_codevector_dim* space.

Example:

```python
>>> from transformers import AutoProcessor, HubertModel
>>> from datasets import load_dataset
>>> import soundfile as sf

>>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
>>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


>>> def map_to_array(batch):
...     speech, _ = sf.read(batch["file"])
...     batch["speech"] = speech
...     return batch


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
>>> hidden_states = model(input_values).last_hidden_state
```Nr   r   )r   )r   r   r   r   r   )last_hidden_staterD   
attentions)r=   r   r   use_return_dictr   rB   r   r   r   _mask_hidden_statesr   r	   rD   r   )
r<   rv   r   r   r   r   r   extract_featuresrD   encoder_outputss
             r?   rE   HubertModel.forward   s)   J 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]11,?+55a;%!DDEUE[E[\]E^`noN//0@A000d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
rG   )r=   r   r   r   r{   )NNNNN)rI   rJ   rK   rL   r   r%   r   r   r   r   r   FloatTensorr   r   r   r	   rE   rM   rN   rO   s   @r?   r   r      s    | &66 269=,0/3&*F
u||,F
 !.F
 $E$5$56	F

 $D>F
 'tnF
 d^F
 
uo%	&F
 F
rG   r   c                       \ rS rSrSrg)HubertForCTCi  rR   NrS   rR   rG   r?   r   r     rT   rG   r   c                       \ rS rSrSrg)r   i  rR   NrS   rR   rG   r?   r   r     rT   rG   r   )r   r   r   rs   )(typingr   r   r   r   torch.nnr&   activationsr   integrations.deepspeedr   modeling_outputsr	   modeling_utilsr
   r/   r   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   configuration_hubertr   _HIDDEN_STATES_START_POSITIONModuler   r9   rV   rY   rm   rp   rs   r   r   r   __all__rR   rG   r?   <module>r      s    ) )   ! @ / - #   / !" /BII /d	- 		1 	bii $	O 		#A 	 BO B BJ`
-!6 `
F	> 		&G 	 frG   