ó
    fT–hr ã                   ó<  • S SK JrJrJrJr  S SKrS SKrS SKJ	r	  SSK
Jr  SSKJrJrJr  SSKJr  SSKJrJrJr  SSKJr  SS	KJr  SS
KJrJrJrJrJr  SSK J!r!J"r"  SSK#J$r$J%r%  SSK&J'r'  SSK(J)r)J*r*J+r+J,r,  SSK-J.r.  \+" 5       (       a  S SK/J0r0  SSK1J2r2  \,Rf                  " \45      r5 " S S\	Rl                  5      r7 " S S\	Rl                  5      r8S\Rr                  S\:S\Rr                  4S jr; SCS\	Rl                  S\Rr                  S\Rr                  S\Rr                  S\\Rr                     S \<S!\<4S" jjr=S# r>SDS$ jr? " S% S&\	Rl                  5      r@ " S' S(\	Rl                  5      rA " S) S*\5      rB " S+ S,\5      rC\) " S- S.\%5      5       rD " S/ S0\D5      rE\) " S1 S2\D5      5       rF  SES3\\:\:4   S4\<S5\:S\\RŽ                     S6\:S\R                  4S7 jjrI\) " S8 S9\D5      5       rJS:\Rr                  S;\:S<\:4S= jrK\)" S>S?9 " S@ SA\D\5      5       rL/ SBQrMg)Fé    )ÚCallableÚOptionalÚTupleÚUnionNé   )ÚACT2FN)ÚCacheÚDynamicCacheÚEncoderDecoderCache)ÚGenerationMixin)ÚAttentionMaskConverterÚ_prepare_4d_attention_maskÚ#_prepare_4d_attention_mask_for_sdpa)ÚFlashAttentionKwargs)ÚGradientCheckpointingLayer)ÚBaseModelOutputÚBaseModelOutputWithPastÚ)BaseModelOutputWithPastAndCrossAttentionsÚSeq2SeqLMOutputÚSeq2SeqModelOutput)ÚROPE_INIT_FUNCTIONSÚdynamic_rope_update)ÚALL_ATTENTION_FUNCTIONSÚPreTrainedModel)ÚUnpack)Úauto_docstringÚcan_return_tupleÚis_torch_flex_attn_availableÚloggingé   )ÚMoonshineConfig)Ú	BlockMask)Úmake_flex_block_causal_maskc                   ób   ^ • \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )ÚMoonshineEncoderMLPé<   c                 ó
  >• [         TU ]  5         Xl        [        U   U l        [
        R                  " UR                  UR                  5      U l	        [
        R                  " UR                  UR                  5      U l
        g ©N©ÚsuperÚ__init__Úconfigr   Úactivation_fnÚnnÚLinearÚhidden_sizeÚintermediate_sizeÚfc1Úfc2©Úselfr,   Ú
hidden_actÚ	__class__s      €Úh/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/moonshine/modeling_moonshine.pyr+   ÚMoonshineEncoderMLP.__init__=   s\   ø€ Ü‰ÑÔØŒÜ# JÑ/ˆÔÜ—9’9˜V×/Ñ/°×1IÑ1IÓJˆŒÜ—9’9˜V×5Ñ5°v×7IÑ7IÓJˆó    Úhidden_statesÚreturnc                 ól   • U R                  U5      nU R                  U5      nU R                  U5      nU$ r(   )r2   r-   r3   )r5   r;   s     r8   ÚforwardÚMoonshineEncoderMLP.forwardD   s4   € ØŸ™ Ó/ˆØ×*Ñ*¨=Ó9ˆØŸ™ Ó/ˆØÐr:   ©r-   r,   r2   r3   ©
Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__r+   ÚtorchÚTensorr>   Ú__static_attributes__Ú__classcell__©r7   s   @r8   r%   r%   <   s)   ø† õKð U§\¡\ð °e·l±l÷ ò r:   r%   c                   ób   ^ • \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )ÚMoonshineDecoderMLPéK   c                 ó  >• [         TU ]  5         Xl        [        U   U l        [
        R                  " UR                  UR                  S-  5      U l	        [
        R                  " UR                  UR                  5      U l
        g )Né   r)   r4   s      €r8   r+   ÚMoonshineDecoderMLP.__init__L   sa   ø€ Ü‰ÑÔØŒÜ# JÑ/ˆÔÜ—9’9˜V×/Ñ/°×1IÑ1IÈAÑ1MÓNˆŒÜ—9’9˜V×5Ñ5°v×7IÑ7IÓJˆr:   r;   r<   c                 ó–   • U R                  U5      nUR                  SSS9u  pU R                  U5      U-  nU R                  U5      nU$ )NrO   éÿÿÿÿ©Údim)r2   Úchunkr-   r3   )r5   r;   Úgates      r8   r>   ÚMoonshineDecoderMLP.forwardS   sQ   € ØŸ™ Ó/ˆØ+×1Ñ1°!¸Ð1Ð<ÑˆØ×*Ñ*¨4Ó0°=Ñ@ˆØŸ™ Ó/ˆØÐr:   r@   rA   rJ   s   @r8   rL   rL   K   s)   ø† õKð U§\¡\ð °e·l±l÷ ò r:   rL   r;   Ún_repr<   c                 ó    • U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )zÈ
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
r    N)ÚshapeÚexpandÚreshape)r;   rX   ÚbatchÚnum_key_value_headsÚslenÚhead_dims         r8   Ú	repeat_kvra   [   s_   € ð
 2?×1DÑ1DÑ.€E ØƒzØÐØ!¢!¢Q¨ªa²Ð"2Ñ3×:Ñ:¸5ÐW\ÐdlÓm€MØ× Ñ  ¸eÑ(CÀTÓTÐTr:   ÚmoduleÚqueryÚkeyÚvalueÚattention_maskÚscalingÚdropoutc                 ó@  • [        X R                  5      n[        X0R                  5      n	[        R                  " XR	                  SS5      5      U-  n
Ub"  US S 2S S 2S S 2S UR
                  S   24   nX«-   n
[        R                  R                  U
S[        R                  S9R                  UR                  5      n
[        R                  R                  X¦U R                  S9n
[        R                  " X©5      nUR	                  SS5      R                  5       nXÊ4$ )NrO   r   éþÿÿÿrR   )rT   Údtype)ÚpÚtrainingr    )ra   Únum_key_value_groupsrF   ÚmatmulÚ	transposerZ   r.   Ú
functionalÚsoftmaxÚfloat32Útork   rh   rm   Ú
contiguous)rb   rc   rd   re   rf   rg   rh   ÚkwargsÚ
key_statesÚvalue_statesÚattn_weightsÚcausal_maskÚattn_outputs                r8   Úeager_attention_forwardr|   g   sö   € ô ˜3× ;Ñ ;Ó<€JÜ˜U×$?Ñ$?Ó@€Lä—<’< ×';Ñ';¸A¸qÓ'AÓBÀWÑL€LØÑ!Ø$¢Qªª1Ð.D°
×0@Ñ0@ÀÑ0DÐ.DÐ%DÑEˆØ#Ñ1ˆä—=‘=×(Ñ(¨¸2ÄUÇ]Á]Ð(ÐS×VÑVÐW\×WbÑWbÓc€LÜ—=‘=×(Ñ(¨È6Ï?É?Ð(Ð[€LÜ—,’,˜|Ó:€KØ×'Ñ'¨¨1Ó-×8Ñ8Ó:€KàÐ$Ð$r:   c                 óx   • U SSSS24   nU SSSS24   n[         R                  " U* U4SS9R                  S5      $ )	z*Rotates half the hidden dims of the input..r   NrO   r    rR   rS   rj   )rF   ÚstackÚflatten)ÚxÚx1Úx2s      r8   Úrotate_halfrƒ      sJ   € à	
ˆ31ˆ9‰€BØ	
ˆ31ˆ9‰€BÜ;Š;˜˜˜Ry bÑ)×1Ñ1°"Ó5Ð5r:   c                 óî  • UR                  U5      nUR                  U5      nUSSUR                  S   S-  24   R                  SSS9nUSSUR                  S   S-  24   R                  SSS9nUR                  S   nU SSU24   U SUS24   p‡USSU24   USUS24   p©Xr-  [        U5      U-  -   nX’-  [        U	5      U-  -   n[        R
                  " X¸/SS9n[        R
                  " XÊ/SS9nX¼4$ )a—  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    position_ids (`torch.Tensor`, *optional*):
        Deprecated and unused.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
.NrR   rO   rS   )Ú	unsqueezerZ   Úrepeat_interleaverƒ   rF   Úcat)ÚqÚkÚcosÚsinÚposition_idsÚunsqueeze_dimÚ
rotary_dimÚq_rotÚq_passÚk_rotÚk_passÚq_embedÚk_embeds                r8   Úapply_rotary_pos_embr•   ˆ   s6  € ð( -‰-˜Ó
&€CØ
-‰-˜Ó
&€Cð ˆcÐ'S—Y‘Y˜r‘] aÑ'Ð'Ð'Ñ
(×
:Ñ
:¸1À"Ð
:Ð
E€CØ
ˆcÐ'S—Y‘Y˜r‘] aÑ'Ð'Ð'Ñ
(×
:Ñ
:¸1À"Ð
:Ð
E€Cð —‘˜2‘€JØc˜;˜J˜;Ð&Ñ'¨¨3°
±Ð+;Ñ)<ˆ6Øc˜;˜J˜;Ð&Ñ'¨¨3°
±Ð+;Ñ)<ˆ6ð ‰{œ{¨5Ó1°CÑ7Ñ8€GØ‰{œ{¨5Ó1°CÑ7Ñ8€Gô iŠi˜Ð)¨rÑ2€GÜiŠi˜Ð)¨rÑ2€GØÐÐr:   c                   ó|  ^ • \ rS rSrSrS\S\S\S\S\4
U 4S jjr     SS	\	R                  S
\\\	R                  \	R                  4      S\\	R                     S\\   S\\	R                     S\\	R                     S\\   S\\	R                  \\	R                     \\\	R                        4   4S jjrSrU =r$ )ÚMoonshineAttentioné²   z=Multi-headed attention from 'Attention Is All You Need' paperr,   Ú	layer_idxÚ	is_causalÚnum_attention_headsr^   c                 ó  >• [         TU ]  5         UR                  XES.5        Xl        X l        [        USUR                  UR                  -  5      U l        UR                  UR                  -  U l
        U R                  S-  U l        UR                  U l        X0l        [        R                  " UR                  UR                  U R                  -  UR                   S9U l        [        R                  " UR                  UR                  U R                  -  UR                   S9U l        [        R                  " UR                  UR                  U R                  -  UR                   S9U l        [        R                  " UR                  U R                  -  UR                  SS9U l        U R                  R*                  bA  U R                  R*                  nX`R                  U-   S-
  U-  -  nXpR                  -
  U l        g SU l        g )N)r›   r^   r`   g      à¿©ÚbiasFr    r   )r*   r+   Úupdater,   r™   Úgetattrr0   r›   r`   r^   rn   rg   Úattention_dropoutrš   r.   r/   Úattention_biasÚq_projÚk_projÚv_projÚo_projÚpad_head_dim_to_multiple_ofÚhead_dim_padding)	r5   r,   r™   rš   r›   r^   Útarget_multipleÚtarget_head_dimr7   s	           €r8   r+   ÚMoonshineAttention.__init__µ   s¥  ø€ ô 	‰ÑÔØ‰Ð.AÑnÔoØŒØ"ŒÜ ¨
°F×4FÑ4FÈ&×JdÑJdÑ4dÓeˆŒØ$*×$>Ñ$>À&×B\ÑB\Ñ$\ˆÔ!Ø—}‘} dÑ*ˆŒØ!'×!9Ñ!9ˆÔØ"Œä—i’iØ×Ñ × :Ñ :¸T¿]¹]Ñ JÐQW×QfÑQfñ
ˆŒô —i’iØ×Ñ × :Ñ :¸T¿]¹]Ñ JÐQW×QfÑQfñ
ˆŒô —i’iØ×Ñ × :Ñ :¸T¿]¹]Ñ JÐQW×QfÑQfñ
ˆŒô —i’i × :Ñ :¸T¿]¹]Ñ JÈF×L^ÑL^ÐejÑkˆŒð ;‰;×2Ñ2Ñ>Ø"Ÿk™k×EÑEˆOØ-·-±-À/Ñ2QÐTUÑ2UÐZiÑ1iÑjˆOØ$3·m±mÑ$CˆDÕ!à$%ˆDÕ!r:   r;   Úposition_embeddingsrf   Úpast_key_valueÚcache_positionÚkey_value_statesrv   r<   c                 ó  • UR                   S S u  p‰U R                  U5      R                  X‰U R                  R                  U R
                  5      R                  SS5      n
US LnUb^  UR                  R                  U R                  5      nU(       a&  SUR                  U R                  '   UR                  nOUR                  nUb  UOUnU(       aA  U(       a:  W(       a3  UR                  U R                     nUR                  U R                     nOÍU R                  U5      R                  USU R                  R                  U R
                  5      R                  SS5      nU R                  U5      R                  USU R                  R                  U R
                  5      R                  SS5      nU(       a$  Ub!  UR!                  XïU R                  SU05      u  pïU(       d<  Uu  nn[#        X®UU5      u  p®Ub%  UUUS.nUR!                  XïU R                  U5      u  pï[$        nU R                  R&                  S:w  ad  U R                  R&                  S:X  a-  UR                  S	S
5      (       a  [(        R+                  S5        O[,        U R                  R&                     nU R.                  (       a  Uc  U	S:”  a  SOS
nU R0                  S:”  a¢  [2        R4                  R6                  R9                  U
SU R0                  45      n
[2        R4                  R6                  R9                  USU R0                  45      n[2        R4                  R6                  R9                  USU R0                  45      nU" U U
UUU4U R:                  (       d  SOU R<                  U R>                  US.UD6u  nnU R0                  S:”  a  USS U R0                  * 24   nURA                  X‰S5      RC                  5       nU RE                  U5      nUU4$ )NrR   r    rO   Tr®   )r‹   rŠ   r®   ÚeagerÚsdpaÚoutput_attentionsFzã`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.r   ç        )rh   rg   rš   .)#rZ   r£   Úviewr,   r^   r`   rp   Ú
is_updatedÚgetr™   Úcross_attention_cacheÚself_attention_cacheÚ	key_cacheÚvalue_cacher¤   r¥   rŸ   r•   r|   Ú_attn_implementationÚloggerÚwarning_oncer   rš   r¨   rF   r.   rq   Úpadrm   r¡   rg   r\   ru   r¦   )r5   r;   r¬   rf   r­   r®   r¯   rv   ÚbszÚq_lenÚquery_statesÚis_cross_attentionr¶   Úcurrent_statesrw   rx   rŠ   r‹   Úcache_kwargsÚattention_interfacerš   r{   ry   s                          r8   r>   ÚMoonshineAttention.forwardÚ   s  € ð #×(Ñ(¨¨"Ð-‰
ˆð K‰K˜Ó&×+Ñ+¨C¸¿¹×8WÑ8WÐY]×YfÑYfÓg×qÑqÐrsÐuvÓwð 	ð .°TÐ9ÐØÑ%Ø'×2Ñ2×6Ñ6°t·~±~ÓFˆJÞ!à<@×)Ñ)¨$¯.©.Ñ9Ø!/×!EÑ!E‘à!/×!DÑ!Dð .>Ñ-IÑ)È}ˆÞ¦.¶ZØ'×1Ñ1°$·.±.ÑAˆJØ)×5Ñ5°d·n±nÑE‰Lð —‘˜NÓ+ß‘c˜2˜tŸ{™{×>Ñ>ÀÇÁÓNß‘˜1˜a“ð ð —‘˜NÓ+ß‘c˜2˜tŸ{™{×>Ñ>ÀÇÁÓNß‘˜1˜a“ð ö
 " nÑ&@Ø+9×+@Ñ+@Ø¨d¯n©nÐ?OÐQ_Ð>`ó,Ñ(
ö "Ø*‰HˆCÜ';¸LÐVYÐ[^Ó'_Ñ$ˆLàÑ)Ø'*°3È.ÑYØ+9×+@Ñ+@Ø¨d¯n©n¸ló,Ñ(
ô )@ÐØ;‰;×+Ñ+¨wÓ6Ø{‰{×/Ñ/°6Ó9¸f¿j¹jÐI\Ð^c×>dÑ>dÜ×#Ñ#ðLõô
 '>¸d¿k¹k×>^Ñ>^Ñ&_Ð#à ŸNŸN¨~Ñ/EÈ%ÐRSË)‘DÐY^ˆ	à× Ñ  1Ó$Ü Ÿ8™8×.Ñ.×2Ñ2°<À!ÀT×EZÑEZÐA[Ó\ˆLÜŸ™×,Ñ,×0Ñ0°¸aÀ×AVÑAVÐ=WÓXˆJÜ Ÿ8™8×.Ñ.×2Ñ2°<À!ÀT×EZÑEZÐA[Ó\ˆLá$7ØØØØØð
%
ð  $Ÿ}Ÿ}‘C°$×2HÑ2HØ—L‘LØñ
%
ð ñ
%
Ñ!ˆ\ð × Ñ  1Ó$Ø% cÐ+C¨d×.CÑ.CÐ-CÐ+CÐ&CÑDˆKà!×)Ñ)¨#°bÓ9×DÑDÓFˆØ—k‘k +Ó.ˆØ˜LÐ(Ð(r:   )r¡   r,   r`   r¨   rš   r¤   r™   rn   r¦   r£   rg   r¥   )NNNNN)rB   rC   rD   rE   Ú__doc__r!   ÚintÚboolr+   rF   rG   r   r   r	   Ú
LongTensorr   r   r>   rH   rI   rJ   s   @r8   r—   r—   ²   s  ø† ÙGð#&àð#&ð ð#&ð ð	#&ð
 !ð#&ð !÷#&ðP LPØ15Ø*.Ø59Ø37ñ[)à—|‘|ð[)ð & e¨E¯L©L¸%¿,¹,Ð,FÑ&GÑHð[)ð ! §¡Ñ.ð	[)ð
 ! ™ð[)ð ! ×!1Ñ!1Ñ2ð[)ð # 5§<¡<Ñ0ð[)ð Ð-Ñ.ð[)ð 
ˆu|‰|˜X e§l¡lÑ3°X¸eÀEÇLÁLÑ>QÑ5RÐRÑ	S÷[)ó [)r:   r—   c                   ól   ^ • \ rS rSrSS\4U 4S jjjr\R                  " 5       \S 5       5       r	Sr
U =r$ )ÚMoonshineRotaryEmbeddingi8  r,   c                 óì  >• [         TU ]  5         [        US5      (       aH  UR                  b;  UR                  R	                  SUR                  R	                  S5      5      U l        OSU l        UR                  U l        UR                  U l        Xl	        [        U R
                     U l        U R                  U R                  U5      u  o0l        U R                  SUSS9  U R                  U l        g )NÚrope_scalingÚ	rope_typeÚtypeÚdefaultÚinv_freqF)Ú
persistent)r*   r+   ÚhasattrrÏ   r·   rÐ   Úmax_position_embeddingsÚmax_seq_len_cachedÚoriginal_max_seq_lenr,   r   Úrope_init_fnÚattention_scalingÚregister_bufferrÓ   Úoriginal_inv_freq)r5   r,   ÚdevicerÓ   r7   s       €r8   r+   Ú!MoonshineRotaryEmbedding.__init__9  sÈ   ø€ Ü‰ÑÔä6˜>×*Ñ*¨v×/BÑ/BÑ/NØ#×0Ñ0×4Ñ4°[À&×BUÑBU×BYÑBYÐZ`ÓBaÓbˆDNà&ˆDŒNØ"(×"@Ñ"@ˆÔØ$*×$BÑ$BˆÔ!àŒÜ/°·±Ñ?ˆÔà+/×+<Ñ+<¸T¿[¹[È&Ó+QÑ(ˆÔ(Ø×Ñ˜Z¨¸eÐÑDØ!%§¡ˆÕr:   c                 ób  • U R                   S S S 2S 4   R                  5       R                  UR                  S   SS5      R	                  UR
                  5      nUS S 2S S S 24   R                  5       n[        UR
                  R                  [        5      (       a0  UR
                  R                  S:w  a  UR
                  R                  OSn[        R                  " USS9   UR                  5       UR                  5       -  R                  SS5      n[        R                  " Xf4SS	9nUR                  5       U R                  -  nUR                  5       U R                  -  n	S S S 5        WR	                  UR                   S
9W	R	                  UR                   S
94$ ! , (       d  f       N@= f)Nr   rR   r    ÚmpsÚcpuF)Údevice_typeÚenabledrO   rS   ©rk   )rÓ   Úfloatr[   rZ   rt   rÝ   Ú
isinstancerÑ   ÚstrrF   Úautocastrp   r‡   rŠ   rÚ   r‹   rk   )
r5   r€   rŒ   Úinv_freq_expandedÚposition_ids_expandedrâ   ÚfreqsÚembrŠ   r‹   s
             r8   r>   Ú MoonshineRotaryEmbedding.forwardJ  sR  € ð !ŸM™M¨$²°4¨-Ñ8×>Ñ>Ó@×GÑGÈ×HZÑHZÐ[\ÑH]Ð_aÐcdÓe×hÑhÐij×iqÑiqÓrÐØ ,ªQ°²a¨ZÑ 8× >Ñ >Ó @Ðä'1°!·(±(·-±-Ä×'EÑ'EÈ!Ï(É(Ï-É-Ð[`ÓJ`a—h‘h—m’mÐfkˆÜ^Š^¨¸UÓCØ&×,Ñ,Ó.Ð1F×1LÑ1LÓ1NÑN×YÑYÐZ[Ð]^Ó_ˆEÜ—)’)˜U˜N°Ñ3ˆCØ—'‘'“)˜d×4Ñ4Ñ4ˆCØ—'‘'“)˜d×4Ñ4Ñ4ˆC÷	 Dð v‰v˜AŸG™GˆvÐ$ c§f¡f°1·7±7 fÐ&;Ð;Ð;÷ DÕCús   Ã$BF Æ 
F.)rÚ   r,   r×   rÜ   rØ   rÙ   rÐ   r(   )rB   rC   rD   rE   r!   r+   rF   Úno_gradr   r>   rH   rI   rJ   s   @r8   rÍ   rÍ   8  s6   ø† ñ/˜÷ /ð /ð" ‡]‚]ƒ_Øñ<ó ó ö<r:   rÍ   c                   ó€  ^ • \ rS rSrS\S\4U 4S jjr       SS\R                  S\	\R                     S\	\R                     S\	\   S	\	\   S
\	\   S\	\R                     S\	\\R                  \R                  4      S\\   S\\R                   \	\\R                   \R                   4      4   4S jjrSrU =r$ )ÚMoonshineEncoderLayeriZ  r,   r™   c                 óT  >• [         TU ]  5         UR                  U l        [        UUSUR                  UR
                  S9U l        [        XR                  5      U l	        [        R                  " UR                  SS9U l        [        R                  " UR                  SS9U l        g )NF©r,   r™   rš   r›   r^   r   )r*   r+   r0   r—   Úencoder_num_attention_headsÚencoder_num_key_value_headsÚ	self_attnr%   Úencoder_hidden_actÚmlpr.   Ú	LayerNormÚinput_layernormÚpost_attention_layernorm©r5   r,   r™   r7   s      €r8   r+   ÚMoonshineEncoderLayer.__init__[  s‰   ø€ Ü‰ÑÔØ!×-Ñ-ˆÔä+ØØØØ &× BÑ BØ &× BÑ Bñ
ˆŒô ' v×/HÑ/HÓIˆŒÜ!Ÿ|š|¨F×,>Ñ,>ÀUÑKˆÔÜ(*¯ª°V×5GÑ5GÈeÑ(TˆÕ%r:   r;   rf   rŒ   r­   r³   Ú	use_cacher®   r¬   rv   r<   c	                 óÜ   • Un
U R                  U5      nU R                  " SUUUUUUUUS.U	D6u  pX¡-   nUn
U R                  U5      nU R                  U5      nX¡-   nU4nU(       a  XË4-  nU$ )N©r;   rf   rŒ   r­   r³   rý   r®   r¬   © )rù   rõ   rú   r÷   )r5   r;   rf   rŒ   r­   r³   rý   r®   r¬   rv   ÚresidualÚself_attn_weightsÚoutputss                r8   r>   ÚMoonshineEncoderLayer.forwardk  s¥   € ð !ˆØ×,Ñ,¨]Ó;ˆð ,0¯>ª>ð 
,
Ø'Ø)Ø%Ø)Ø/ØØ)Ø 3ñ
,
ð ñ
,
Ñ(ˆð !Ñ0ˆð !ˆØ×5Ñ5°mÓDˆØŸ™ Ó/ˆØ Ñ0ˆà Ð"ˆÞØÐ+Ñ+ˆGàˆr:   )r0   rù   r÷   rú   rõ   )NNNFFNN)rB   rC   rD   rE   r!   rÉ   r+   rF   rG   r   rË   r	   rÊ   r   r   r   ÚFloatTensorr>   rH   rI   rJ   s   @r8   rð   rð   Z  s  ø† ðU˜ð U¸3÷ Uð& 26Ø37Ø*.Ø,1Ø$)Ø59ØKOñ'à—|‘|ð'ð ! §¡Ñ.ð'ð ˜u×/Ñ/Ñ0ð	'ð
 ! ™ð'ð $ D™>ð'ð ˜D‘>ð'ð ! ×!1Ñ!1Ñ2ð'ð & e¨E¯L©L¸%¿,¹,Ð,FÑ&GÑHð'ð Ð-Ñ.ð'ð 
ˆu× Ñ  (¨5°×1BÑ1BÀE×DUÑDUÐ1UÑ+VÑ"WÐWÑ	X÷'ó 'r:   rð   c                    ó  ^ • \ rS rSrSS\S\\   4U 4S jjjr           SS\R                  S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\   S\\   S\\   S\\R                     S\\\R                  \R                  4      S\\\R                  \R                  4      S\\R                  \\\R                  \R                  4      4   4S jjrSrU =r$ )ÚMoonshineDecoderLayeri•  r,   r™   c                 óè  >• [         TU ]  5         UR                  U l        [        UUSUR                  UR
                  S9U l        [        UUSUR                  UR
                  S9U l        [        XR                  5      U l
        [        R                  " UR                  SS9U l        [        R                  " UR                  SS9U l        [        R                  " UR                  SS9U l        g )NTrò   Fr   )r*   r+   r0   r—   Údecoder_num_attention_headsÚdecoder_num_key_value_headsrõ   Úencoder_attnrL   Údecoder_hidden_actr÷   r.   rø   rù   rú   Úfinal_layernormrû   s      €r8   r+   ÚMoonshineDecoderLayer.__init__–  sÌ   ø€ Ü‰ÑÔØ!×-Ñ-ˆÔä+ØØØØ &× BÑ BØ &× BÑ Bñ
ˆŒô /ØØØØ &× BÑ BØ &× BÑ Bñ
ˆÔô ' v×/HÑ/HÓIˆŒÜ!Ÿ|š|¨F×,>Ñ,>ÀUÑKˆÔÜ(*¯ª°V×5GÑ5GÈeÑ(TˆÔ%Ü!Ÿ|š|¨F×,>Ñ,>ÀUÑKˆÕr:   r;   rf   Úencoder_hidden_statesÚencoder_attention_maskrŒ   Úencoder_position_idsr­   r³   rý   r®   r¬   Úencoder_position_embeddingsr<   c                 óF  • UnU R                  U5      nU R                  " SUUUUUU	U
US.UD6u  pXá-   nS nUb.  UnU R                  U5      nU R                  UUUUUU	S9u  nnXá-   nUnU R	                  U5      nU R                  U5      nXá-   nU4nU(       a  UUU4-  nU$ )Nrÿ   )r;   r¯   rf   r­   r³   rý   r   )rù   rõ   rú   r  r  r÷   )r5   r;   rf   r  r  rŒ   r  r­   r³   rý   r®   r¬   r  rv   r  r  Úcross_attn_weightsr  s                     r8   r>   ÚMoonshineDecoderLayer.forward®  s  € ð  !ˆà×,Ñ,¨]Ó;ˆð ,0¯>ª>ð 
,
Ø'Ø)Ø%Ø)Ø/ØØ)Ø 3ñ
,
ð ñ
,
Ñ(ˆð !Ñ0ˆð "ÐØ Ñ,Ø$ˆHØ ×9Ñ9¸-ÓHˆMØ04×0AÑ0AØ+Ø!6Ø5Ø-Ø"3Ø#ð 1Bð 1Ñ-ˆMÐ-ð %Ñ4ˆMð !ˆØ×,Ñ,¨]Ó;ˆØŸ™ Ó/ˆØ Ñ0ˆà Ð"ˆæØÐ)Ð+=Ð>Ñ>ˆGàˆr:   )r  r  r0   rù   r÷   rú   rõ   r(   )NNNNNNFFNNN)rB   rC   rD   rE   r!   r   rÉ   r+   rF   rG   rË   r	   rÊ   r   r  r>   rH   rI   rJ   s   @r8   r  r  •  su  ø† ñL˜ð L¸8ÀC¹=÷ Lð Lð6 26Ø8<Ø9=Ø37Ø;?Ø*.Ø,1Ø$)Ø59ØKOØSWñ<à—|‘|ð<ð ! §¡Ñ.ð<ð  (¨¯©Ñ5ð	<ð
 !)¨¯©Ñ 6ð<ð ˜u×/Ñ/Ñ0ð<ð ' u×'7Ñ'7Ñ8ð<ð ! ™ð<ð $ D™>ð<ð ˜D‘>ð<ð ! ×!1Ñ!1Ñ2ð<ð & e¨E¯L©L¸%¿,¹,Ð,FÑ&GÑHð<ð &.¨e°E·L±LÀ%Ç,Á,Ð4NÑ.OÑ%Pð<ð 
ˆu× Ñ  (¨5°×1BÑ1BÀE×DUÑDUÐ1UÑ+VÑ"WÐWÑ	X÷<ó <r:   r  c                   ód   • \ rS rSr\rSrSrSrSS/r	Sr
SrSrSrS rS\R                   4S	 jrS
rg)ÚMoonshinePreTrainedModelií  ÚmodelÚinput_valuesTrð   r  c                 óP  • U R                   R                  n[        U[        R                  [        R
                  45      (       aW  UR                  R                  R                  SUS9  UR                  b%  UR                  R                  R                  5         g g [        U[        R                  [        R                  45      (       aX  UR                  R                  R                  S5        UR                  b%  UR                  R                  R                  5         g g [        U[        R                  5      (       ad  UR                  R                  R                  SUS9  UR                  b2  UR                  R                  UR                     R                  5         g g g )Nr´   )ÚmeanÚstdg      ð?)r,   Úinitializer_rangeræ   r.   r/   ÚConv1dÚweightÚdataÚnormal_rž   Úzero_Ú	GroupNormrø   Úfill_Ú	EmbeddingÚpadding_idx)r5   rb   r  s      r8   Ú_init_weightsÚ&MoonshinePreTrainedModel._init_weightsù  s)  € Øk‰k×+Ñ+ˆÜfœrŸy™y¬"¯)©)Ð4×5Ñ5ØM‰M×Ñ×&Ñ&¨C°SÐ&Ñ9Ø{‰{Ñ&Ø—‘× Ñ ×&Ñ&Õ(ð 'ä˜¤§¡¬r¯|©|Ð <×=Ñ=ØM‰M×Ñ×$Ñ$ SÔ)Ø{‰{Ñ&Ø—‘× Ñ ×&Ñ&Õ(ð 'ä˜¤§¡×-Ñ-ØM‰M×Ñ×&Ñ&¨C°SÐ&Ñ9Ø×!Ñ!Ñ-Ø—‘×"Ñ" 6×#5Ñ#5Ñ6×<Ñ<Õ>ð .ð .r:   Úinput_lengthsc                 ó~   • [        US-
  S-  S-   5      n[        US-
  S-  S-   5      n[        US-
  S-  S-   5      nU$ )z8
Computes the output length of the convolutional layers
é   é@   r    é   r   rO   )rÉ   )r5   r)  Úoutput_conv1_lengthÚoutput_conv2_lengthÚoutput_conv3_lengths        r8   Ú _get_feat_extract_output_lengthsÚ9MoonshinePreTrainedModel._get_feat_extract_output_lengths  sZ   € ô " =°3Ñ#6¸"Ñ"<¸qÑ"@ÓAÐÜ!Ð#6¸Ñ#:¸aÑ"?À!Ñ"CÓDÐÜ!Ð#6¸Ñ#:¸aÑ"?À!Ñ"CÓDÐà"Ð"r:   r   N)rB   rC   rD   rE   r!   Úconfig_classÚbase_model_prefixÚmain_input_nameÚsupports_gradient_checkpointingÚ_no_split_modulesÚ_supports_flash_attn_2Ú_supports_sdpaÚ_supports_cache_classÚ_supports_static_cacher'  rF   rË   r1  rH   r   r:   r8   r  r  í  sR   † à"€LØÐØ$€OØ&*Ð#Ø0Ð2IÐJÐØ!ÐØ€NØ ÐØ!Ðò?ð#¸e×>NÑ>N÷ #r:   r  c                   óú   ^ • \ rS rSrSrSrS\4U 4S jjrS\R                  4S jr
S\R                  4S	 jr\    SS\\R                     S
\\R                      S\\   S\\   S\\   S\4S jj5       rSrU =r$ )ÚMoonshineEncoderi  z“
Transformer encoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MoonshineEncoderLayer`]

Args:
    config: MoonshineConfig
r  r,   c           	      óL  >• [         TU ]  U5        Xl        UR                  n[        R
                  " SUSSSS9U l        [        R
                  " USU-  SSS	9U l        [        R
                  " SU-  USSS	9U l        [        R                  " SUS
S9U l
        [        US9U l        [        R                  " [        UR                  5       Vs/ s H  n[!        X5      PM     sn5      U l        [        R$                  " USS9U l        SU l        U R+                  5         g s  snf )Nr    r+  r,  F)Úkernel_sizeÚstriderž   rO   r-  r   )r?  r@  gñhãˆµøä>)Ú
num_groupsÚnum_channelsÚeps©r,   r   )r*   r+   r,   r0   r.   r  Úconv1Úconv2Úconv3r#  Ú	groupnormrÍ   Ú
rotary_embÚ
ModuleListÚrangeÚencoder_num_hidden_layersrð   Úlayersrø   Ú
layer_normÚgradient_checkpointingÚ	post_init)r5   r,   Ú	embed_dimÚidxr7   s       €r8   r+   ÚMoonshineEncoder.__init__  sî   ø€ Ü‰Ñ˜Ô ØŒØ×&Ñ&ˆ	ä—Y’Y˜q )¸ÀRÈeÑTˆŒ
Ü—Y’Y˜y¨!¨i©-ÀQÈqÑQˆŒ
Ü—Y’Y˜q 9™}¨iÀQÈqÑQˆŒ
ÜŸš°ÀÐPTÑUˆŒä2¸&ÑAˆŒä—m’mÜ;@À×AaÑAaÔ;bÓcÒ;b°CÔ" 6Ö/Ñ;bÑcó
ˆŒô Ÿ,š, y°uÑ=ˆŒà&+ˆÔ#Ø‰Õùò ds   ÃD!r<   c                 ó   • U R                   $ r(   ©rE  ©r5   s    r8   Úget_input_embeddingsÚ%MoonshineEncoder.get_input_embeddings1  s   € Øz‰zÐr:   re   c                 ó   • Xl         g r(   rU  ©r5   re   s     r8   Úset_input_embeddingsÚ%MoonshineEncoder.set_input_embeddings4  s   € Ø
r:   rf   r³   Úoutput_hidden_statesÚflash_attn_kwargsc           	      ó  • Ub  UOU R                   R                  nUb  UOU R                   R                  nUc  [        S5      eUR	                  S5      n[
        R                  R                  U R                  U5      5      nU R                  U5      n[
        R                  R                  U R                  U5      5      n[
        R                  R                  U R                  U5      5      nUR                  SSS5      nUb´  U R                  UR                  S   5      nSnUSSSU24   SSU24   nU R                   R                   S	:X  a  US
:H  R#                  5       (       a  UOSnOLU R                   R                   S:X  a  U(       d  [%        X&R&                  5      nO[)        X&R&                  5      n[*        R,                  " SUR                  S   UR.                  S9R	                  S5      n	U R1                  Xi5      n
U(       a  SOSnU(       a  SOSnU R2                   H3  nU(       a  X¶4-  nU" U4UU	UU
S.UD6nUS   nU(       d  M+  XÎS   4-  nM5     U R5                  U5      nU(       a  X¶4-  n[7        UUUS9$ )a\  
Args:
    input_values (`torch.FloatTensor` of shape `(batch_size, audio_length)`):
        Float values of the raw speech waveform. Raw speech waveform can be
        obtained by loading a `.flac` or `.wav` audio file into an array of type `List[float]` or a
        `numpy.ndarray`, *e.g.* via the soundfile library (`pip install soundfile`). To prepare the array into
        `input_values`, the [`AutoFeatureExtractor`] should be used for padding
        and conversion into a tensor of type `torch.FloatTensor`.
    attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
        Mask to avoid performing attention on padding indices in `input_values`. Mask values selected in `[0, 1]`:
        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.
        [What are attention masks?](../glossary#attention-mask)
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
        tensors for more detail.
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
        more detail.
    return_dict (`bool`, *optional*):
        Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
NzYou must specify input_values.r    r   rO   rR   é€  .Úflash_attention_2r´   r²   ©rÝ   r   )rf   rŒ   r³   r¬   ©Úlast_hidden_stater;   Ú
attentions)r,   r³   r]  Ú
ValueErrorr…   r.   rq   ÚtanhrE  rH  ÚgelurF  rG  Úpermuter1  rZ   r¼   Úanyr   rk   r   rF   ÚarangerÝ   rI  rM  rN  r   )r5   r  rf   r³   r]  r^  r;   Úmask_lenÚdownsample_striderŒ   r¬   Úall_hidden_statesÚall_self_attnsÚencoder_layerÚlayer_outputss                  r8   r>   ÚMoonshineEncoder.forward7  s€  € ð> 2CÑ1NÑ-ÐTX×T_ÑT_×TqÑTqÐà$8Ñ$DÑ È$Ï+É+×JjÑJjð 	ð ÑÜÐ=Ó>Ð>ð $×-Ñ-¨aÓ0ˆÜŸ™×*Ñ*¨4¯:©:°lÓ+CÓDˆØŸ™ }Ó5ˆÜŸ™×*Ñ*¨4¯:©:°mÓ+DÓEˆÜŸ™×*Ñ*¨4¯:©:°mÓ+DÓEˆØ%×-Ñ-¨a°°AÓ6ˆð Ñ%Ø×<Ñ<¸^×=QÑ=QÐRTÑ=UÓVˆHØ *ÐØ+¨CÑ1DÐ3DÐ1DÐ,DÑEÀcÈ9ÈHÈ9ÀnÑUˆNØ{‰{×/Ñ/Ð3FÓFØ4BÀcÑ4I×3NÑ3N×3PÑ3P¡ÐVZ‘ð —‘×1Ñ1°VÓ;ÖDUä!DÀ^×UhÑUhÓ!i‘ô "<¸N×L_ÑL_Ó!`ä—|’| A }×':Ñ':¸1Ñ'=Àm×FZÑFZÑ[×eÑeÐfgÓhˆð #Ÿo™o¨mÓJÐö #7™B¸DÐÞ0™°dˆà!Ÿ[œ[ˆMÞ#Ø!Ð%5Ñ5Ð!á)Øðà-Ø)Ø"3Ø$7ñð $ñˆMð *¨!Ñ,ˆMç Ð Ø°Ñ#3Ð"5Ñ5’ñ! )ð$ Ÿ™¨Ó6ˆö  ØÐ!1Ñ1Ðä&Ø+Ø+Ø%ñ
ð 	
r:   )	r,   rE  rF  rG  rO  rH  rN  rM  rI  )NNNN)rB   rC   rD   rE   rÈ   r5  r!   r+   r.   ÚModulerW  r[  r   r   rF   r  rG   rÊ   r   r   r   r>   rH   rI   rJ   s   @r8   r=  r=    sÁ   ø† ñð %€Oð˜÷ ð( b§i¡iô ð¨"¯)©)ô ð ð 59Ø15Ø,0Ø/3ñc
à˜u×0Ñ0Ñ1ðc
ð ! §¡Ñ.ðc
ð $ D™>ð	c
ð
 ' t™nðc
ð $Ð$8Ñ9ðc
ð 
!ôc
ó öc
r:   r=  c                   ó\  ^ • \ rS rSrSrS\4U 4S jjrS rS r\	\
           SS\\R                     S\\R                     S\\R                     S	\\   S
\\R                      S\\   S\\   S\\   S\\R                     S\\R                      S\\R                     S\\   S\\\4   4S jj5       5       r SS\\R                  S4   S\R                  S\R                  S	\S\4
S jjr\S\R                  S\S\S\R6                  S\R                  S\4S j5       rSrU =r$ )ÚMoonshineDecoderiž  Ú	input_idsr,   c           	      ó
  >• [         TU ]  U5        UR                  U l        UR                  U l        [
        R                  " UR                  UR                  U R                  5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[        X5      PM     sn5      U l        [
        R                  " UR                  SS9U l        [!        US9U l        SU l        U R'                  5         g s  snf )NFr   rD  )r*   r+   Úpad_token_idr&  Ú
vocab_sizer.   r%  r0   Úembed_tokensrJ  rK  Údecoder_num_hidden_layersr  rM  rø   ÚnormrÍ   rI  rO  rP  )r5   r,   rR  r7   s      €r8   r+   ÚMoonshineDecoder.__init__¢  sÈ   ø€ Ü‰Ñ˜Ô Ø!×.Ñ.ˆÔØ ×+Ñ+ˆŒäŸLšL¨×):Ñ):¸F×<NÑ<NÐPT×P`ÑP`ÓaˆÔÜ—m’mÜ;@À×AaÑAaÔ;bÓcÒ;b°CÔ" 6Ö/Ñ;bÑcó
ˆŒô —L’L ×!3Ñ!3¸%Ñ@ˆŒ	Ü2¸&ÑAˆŒØ&+ˆÔ#ð 	‰Õùò ds   ÂD c                 ó   • U R                   $ r(   ©rz  rV  s    r8   rW  Ú%MoonshineDecoder.get_input_embeddings²  s   € Ø× Ñ Ð r:   c                 ó   • Xl         g r(   r  rZ  s     r8   r[  Ú%MoonshineDecoder.set_input_embeddingsµ  s   € Ø!Õr:   rf   rŒ   Úpast_key_valuesÚinputs_embedsrý   r³   r]  r®   r  r  r^  r<   c                 óÀ  • Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUSL USL-  (       a  [	        S5      eU R
                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnUc  U R                  U5      nU(       a"  Uc  [        5       n[        5       n[        XÞ5      nU	cD  Ub  UR                  5       OSn[        R                  " XÿUR                  S   -   UR                   S9n	Uc  U	R#                  S5      nU R%                  X%X”U5      nUnU R'                  UU5      nU(       a  SOSnU(       a  SOSnU(       a  U
b  SOSnUbÃ  U
R                  S	   nS
nUSSSU24   SSU24   nU R                   R(                  S:X  a  US:H  R+                  5       (       a  UOSnOjU R                   R(                  S:X  a,  U(       d%  [-        UUR.                  UR                  S	   5      nO$[1        UUR.                  UR                  S	   5      nU R2                   HH  nU(       a  UU4-  nU" U4UUU
UUUUU	US.	UD6nUS   nU(       d  M1  UUS   4-  nU
c  M?  UUS   4-  nMJ     U R5                  U5      nU(       a  UU4-  n[7        UU(       a  UOSUUUS9$ )a\  
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
    Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
    of the decoder.
encoder_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid performing attention on padding indices in `encoder_hidden_states`. Mask values selected in `[0, 1]`:
    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.
    [What are attention masks?](../glossary#attention-mask)
Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fr   r    rb  r   rj   r`  .ra  r´   r²   )	rf   r  r  rŒ   r­   r³   rý   r®   r¬   rO   )rd  rƒ  r;   re  Úcross_attentions)r,   r³   r]  rý   rf  rO  rm   r½   r¾   rz  r
   r   Úget_seq_lengthrF   rk  rZ   rÝ   r…   Ú_update_causal_maskrI  r¼   rj  r   rk   r   rM  r|  r   )r5   rv  rf   rŒ   rƒ  r„  rý   r³   r]  r®   r  r  r^  r¹   r¸   Úpast_seen_tokensrz   r;   r¬   rn  ro  Úall_cross_attentionsrl  rm  Údecoder_layerrq  s                             r8   r>   ÚMoonshineDecoder.forward¸  s;  € ð6 2CÑ1NÑ-ÐTX×T_ÑT_×TqÑTqÐà$8Ñ$DÑ È$Ï+É+×JjÑJjð 	ð "+Ñ!6‘I¸D¿K¹K×<QÑ<Qˆ	à˜Ð -°tÐ";×<ÜÐYÓZÐZà×&×&¨4¯=¯=¾YÜ×ÑØjôð ˆIàÑ Ø ×-Ñ-¨iÓ8ˆMæ˜Ñ0Ü#/£>Ð Ü$0£NÐ!Ü1Ð2FÓ^ˆOàÑ!ØCRÑC^˜×=Ñ=Ô?ÐdeÐÜ"Ÿ\š\Ø °]×5HÑ5HÈÑ5KÑ"KÐTa×ThÑThñˆNð ÑØ)×3Ñ3°AÓ6ˆLà×.Ñ.Ø¨>ÐL]ó
ˆð &ˆð #Ÿo™o¨m¸\ÓJÐö #7™B¸DÐÞ0™°dˆÞ&7Ð<QÑ<]™rÐdhÐð "Ñ-Ø,×2Ñ2°2Ñ6ˆHØ *ÐØ%;¸CÑATÐCTÐATÐ<TÑ%UÐVYÐ[dÐ\dÐ[dÐVdÑ%eÐ"Ø{‰{×/Ñ/Ð3FÓFØDZÐ^aÑDa×CfÑCf×ChÑChÑ)?ÐnrÑ&ð —‘×1Ñ1°VÓ;ÖDUä)LØ*¨M×,?Ñ,?À×ATÑATÐUWÑAXó*Ñ&ô
 *DØ*¨M×,?Ñ,?À×ATÑATÐUWÑAXó*Ð&ð "Ÿ[œ[ˆMÞ#Ø! mÐ%5Ñ5Ð!á)Øðà*Ø'=Ø&;Ø)Ø.Ø"3Ø#Ø-Ø$7ñð $ñˆMð *¨!Ñ,ˆMç Ð Ø =°Ñ#3Ð"5Ñ5à(Ó4Ø(¨]¸1Ñ-=Ð,?Ñ?Ò(ñ1 )ð4 Ÿ	™	 -Ó0ˆö  Ø -Ð!1Ñ1Ðä8Ø+Þ/8™O¸dØ+Ø%Ø1ñ
ð 	
r:   r"   Úinput_tensorc           	      óæ  • U R                   R                  S:X  a  Ub  US:H  R                  5       (       a  U$ g U R                   R                  S:X  a,  [        U[        R
                  5      (       a  [        U5      nU$ Ub  UR                  5       OSnUb  UR                  OSnU R                   R                  S:X  a5  U(       d.  U(       d'  [        R                  " UUUU R                  S9(       a  g UR                  nUR                  S   n	U(       a  UR                  5       n
O5[        U[        R
                  5      (       a  UR                  S	   OXi-   S-   n
U R                  UU	U
UUUR                  S   S
9nU R                   R                  S:X  aZ  UbW  UR                   R"                  S;   a=  U(       d6  [        R$                  " U5      R&                  n[        R(                  " X¼5      nU$ )Nra  r´   Úflex_attentionr   Fr²   )r„  Úpast_key_values_lengthÚis_trainingr    rR   )Úsequence_lengthÚtarget_lengthrk   r®   Ú
batch_size)ÚcudaÚxpuÚnpu)r,   r¼   rj  ræ   rF   rG   r#   r‡  Úis_compileabler   Ú_ignore_causal_mask_sdparm   rk   rZ   Úget_max_cache_shapeÚ5_prepare_4d_causal_attention_mask_with_cache_positionrÝ   rÑ   ÚfinfoÚminÚ_unmask_unattended)r5   rf   r  r®   rƒ  r³   r‰  Úusing_compilable_cacherk   r’  r“  rz   Ú	min_dtypes                r8   rˆ  Ú$MoonshineDecoder._update_causal_mask=  sË  € ð ;‰;×+Ñ+Ð/BÓBØÑ)¨~ÀÑ/D×.IÑ.I×.KÑ.KØ%Ð%ØØ;‰;×+Ñ+Ð/?Ó?Ü˜.¬%¯,©,×7Ñ7Ü!<¸^Ó!LØ!Ð!ð
 @OÑ?Z˜?×9Ñ9Ô;Ð`aÐØCRÑC^ ×!?Ò!?ÐdiÐð ;‰;×+Ñ+¨vÓ5Ö>TÖ]nÜ%×>Ò>ØØ*Ø'7Ø ŸM™M÷	ð à×"Ñ"ˆØ&×,Ñ,¨QÑ/ˆÞ!Ø+×?Ñ?ÓA‰Mô ˜n¬e¯l©l×;Ñ;ð ×$Ñ$ RÒ(à%Ñ7¸!Ñ;ð ð ×PÑPØØ+Ø'ØØ)Ø#×)Ñ)¨!Ñ,ð Qð 
ˆð K‰K×,Ñ,°Ó6ØÑ*Ø×%Ñ%×*Ñ*Ð.DÓDÞ%ô
 Ÿš EÓ*×.Ñ.ˆIÜ0×CÒCÀKÓ[ˆKàÐr:   r’  r“  rk   r”  c                 ó¶  • U b  U R                  5       S:X  a  U nU$ [        R                  " U5      R                  n[        R                  " X4XƒUR
                  S9nUS:w  a  [        R                  " USS9nU[        R                  " X$R
                  S9UR                  SS5      :„  -  nUSSSS2SS24   R                  USSS5      nU b‹  UR                  5       nU R                  S   n	USS2SS2SS2SU	24   U SS2SSSS24   R                  UR
                  5      -   n
U
S:H  n
USS2SS2SS2SU	24   R                  X¨5      USS2SS2SS2SU	24'   U$ )	a½  
Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
`(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.

Args:
    attention_mask (`torch.Tensor`):
        A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
        `(batch_size, 1, query_length, key_value_length)`.
    sequence_length (`int`):
        The sequence length being processed.
    target_length (`int`):
        The target length: when generating with static cache, the mask should be as long as the static cache,
        to account for the 0 padding, the part of the cache that is not filled yet.
    dtype (`torch.dtype`):
        The dtype to use for the 4D attention mask.
    cache_position (`torch.Tensor`):
        Indices depicting the position of the input sequence tokens in the sequence.
    batch_size (`torch.Tensor`):
        Batch size.
Né   )Ú
fill_valuerk   rÝ   r    )Údiagonalrb  rR   r   )rT   rF   rœ  r  ÚfullrÝ   Útriurk  r\   r[   ÚclonerZ   rt   Úmasked_fill)rf   r’  r“  rk   r®   r”  rv   rz   r   Úmask_lengthÚpadding_masks              r8   r›  ÚFMoonshineDecoder._prepare_4d_causal_attention_mask_with_cache_position  s}  € ð< Ñ%¨.×*<Ñ*<Ó*>À!Ó*Cà(ˆKð* Ðô' Ÿš EÓ*×.Ñ.ˆIÜŸ*š*Ø Ð0¸YÐ\j×\qÑ\qñˆKð  !Ó#Ü#Ÿjšj¨¸qÑAØœ5Ÿ<š<¨×>SÑ>SÑTÐWe×WmÑWmÐnpÐrsÓWtÑtÑtˆKØ% d¨D²!²QÐ&6Ñ7×>Ñ>¸zÈ1ÈbÐRTÓUˆKØÑ)Ø)×/Ñ/Ó1Ø,×2Ñ2°2Ñ6Ø*ª1ªa²°L°[°LÐ+@ÑAÀNÒSTÐVZÐ\`ÒbcÐScÑDd×DgÑDgØ×&Ñ&óEñ  ð  ,¨qÑ0Ø5@ÂÂAÂqÈ,È;È,ÐAVÑ5W×5cÑ5cØ ó6šAšq¢! \ k \Ð1Ñ2ð Ðr:   )rz  rO  rM  r|  r&  rI  ry  )NNNNNNNNNNN)F)rB   rC   rD   rE   r5  r!   r+   rW  r[  r   r   r   rF   rË   rG   r	   r  rÊ   r   r   r   r   r   r>   rˆ  ÚstaticmethodrÉ   rk   r›  rH   rI   rJ   s   @r8   ru  ru  ž  s  ø† à!€Oð˜÷ ò !ò"ð Øð 15Ø15Ø37Ø+/Ø59Ø$(Ø,0Ø/3Ø59Ø=AØ9=ñA
à˜E×,Ñ,Ñ-ðA
ð ! §¡Ñ.ðA
ð ˜u×/Ñ/Ñ0ð	A
ð
 " %™ðA
ð   × 1Ñ 1Ñ2ðA
ð ˜D‘>ðA
ð $ D™>ðA
ð ' t™nðA
ð ! ×!1Ñ!1Ñ2ðA
ð  (¨×(9Ñ(9Ñ:ðA
ð !)¨¯©Ñ 6ðA
ð $Ð$8Ñ9ðA
ð 
ˆuÐ-Ð-Ñ	.ôA
ó ó ðA
ðR #(ñBà˜eŸl™l¨KÐ7Ñ8ðBð —l‘lðBð Ÿ™ð	Bð
 ðBð  õBðH ð4ØŸ™ð4àð4ð ð4ð {‰{ð	4ð
 Ÿ™ð4ð ó4ó ö4r:   ru  rZ   Ú	mask_probrª  Ú	min_masksc           	      óê  ^^^^^• U u  nmTS:  a  [        S5      eTT:”  a  [        ST ST S35      e[        R                  R                  S5      R	                  5       mUUUUU4S jnUb-  UR                  5       R                  S5      R                  5       O[        U5       Vs/ s H  nTPM     snn[        R                  " UT4[        S	9n	/ n
U" T5      nUS
:X  a  U	$ U H­  nU" U5      n[        R                  R                  [        R                  " UTS-
  -
  5      USS9n[        U5      S
:X  a  TS-
  nOUS
   n[        R                  " U[        R                  " X½-
  [        R                   S	9U-  /5      nU
R#                  U5        M¯     [        R$                  " U
5      n
[        R&                  " U
SS2SS2S4   X[T45      n
U
R)                  X[T-  5      n
[        R                  " T5      SSSS24   n[        R&                  " UX[T45      R)                  X[T-  5      nU
U-   n
U
R+                  5       TS-
  :”  a  TS-
  XªTS-
  :„  '   [        R,                  " XšSS5        U	$ s  snf )a*  
Computes random mask spans for a given shape. Used to implement [SpecAugment: A Simple Data Augmentation Method for
ASR](https://arxiv.org/abs/1904.08779). Note that this method is not optimized to run on TPU and should be run on
CPU as part of the preprocessing during training.

Args:
    shape: The shape for which to compute masks. This should be of a tuple of size 2 where
           the first element is the batch size and the second element is the length of the axis to span.
    mask_prob:  The percentage of the whole axis (between 0 and 1) which will be masked. The number of
                independently generated mask spans of length `mask_length` is computed by
                `mask_prob*shape[1]/mask_length`. Note that due to overlaps, `mask_prob` is an upper bound and the
                actual percentage will be smaller.
    mask_length: size of the mask
    min_masks: minimum number of masked spans
    attention_mask: A (right-padded) attention mask which independently shortens the feature axis of
                    each batch dimension.
r    z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: Ú`c                 ó    >• [        TU -  T-  T-   5      n[        UT5      nUT-  T:”  a  TT-  nU TS-
  -
  U:  a  [        U TS-
  -
  S5      nU$ )z;Given input length, compute how many spans should be maskedr    r   )rÉ   Úmax)Úinput_lengthÚnum_masked_spanÚepsilonrª  r®  r¯  r’  s     €€€€€r8   Úcompute_num_masked_spanÚ6_compute_mask_indices.<locals>.compute_num_masked_spanß  so   ø€ ä˜i¨,Ñ6¸ÑDÀwÑNÓOˆÜ˜o¨yÓ9ˆð ˜[Ñ(¨?Ó:Ø-°Ñ<ˆOð ˜;¨™?Ñ+¨oÓ=Ü! ,°+À±/Ñ"BÀAÓFˆOàÐr:   NrR   rä   r   F)Úreplace)rf  ÚnpÚrandomÚrandÚitemÚdetachÚsumÚtolistrK  ÚzerosrÊ   Úchoicerk  ÚlenÚconcatenateÚonesÚint32ÚappendÚarrayÚbroadcast_tor\   r³  Úput_along_axis)rZ   r®  rª  rf   r¯  r”  r·  Ú_r)  Úspec_aug_maskÚspec_aug_mask_idxsÚmax_num_masked_spanr´  rµ  Úspec_aug_mask_idxÚdummy_mask_idxÚoffsetsr¶  r’  s    `` `            @@r8   Ú_compute_mask_indicesrÒ  ¹  sš  ü€ ð0 #(Ñ€JàQƒÜÐAÓBÐBà_Ó$ÜØ]Ð^iÐ]jØ& Ð&7°qð:ó
ð 	
ô i‰in‰n˜QÓ×$Ñ$Ó&€G÷ñ ð$ Ñ%ð 	×ÑÓ×#Ñ# BÓ'×.Ñ.Ô0ä',¨ZÔ'8Ó9Ò'8 !‹oÑ'8Ñ9ð ô —H’H˜j¨/Ð:Ä$ÑG€MØÐá1°/ÓBÐà˜aÓØÐã%ˆá1°,Ó?ˆô ŸI™I×,Ñ,ÜIŠIl k°A¡oÑ6Ó7¸ÐRWð -ð 
Ðô Ð Ó! QÓ&ð -¨qÑ0‰Nà.¨qÑ1ˆNäŸNšNØ¤§¢Ð(;Ñ(MÔUW×U]ÑU]Ñ ^ÐaoÑ oÐpó
Ðð 	×!Ñ!Ð"3Ö4ñ/ &ô2 ŸšÐ"4Ó5Ðô ŸšØš1ša ˜:Ñ&¨È+Ð(VóÐð ,×3Ñ3°JÐVaÑ@aÓbÐô iŠi˜Ó$ T¨4² ]Ñ3€GÜoŠo˜g¨
ÈÐ'UÓV×^Ñ^Ø¨+Ñ5ó€Gð ,¨gÑ5Ðð ×ÑÓ /°AÑ"5Ó5ØGVÐYZÑGZÐ°À!Ñ0CÑCÑDô ×Òm¸¸BÔ?àÐùòw :s   Â(I0c                   ó&  ^ • \ rS rSrS\4U 4S jjrS rS rS rS r	S r
 SS	\R                  S
\\R                     4S jjr\\            SS\\R                     S
\\R                     S\\R                     S\\R                     S\\\\R                           S\\\\\R                     4      S\\\R                        S\\\R                        S\\   S\\   S\\   S\\R                     S\4S jj5       5       rSrU =r$ )ÚMoonshineModeli0  r,   c                 ó„   >• [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r(   )r*   r+   r=  Úencoderru  ÚdecoderrP  ©r5   r,   r7   s     €r8   r+   ÚMoonshineModel.__init__2  s2   ø€ Ü‰Ñ˜Ô ä'¨Ó/ˆŒÜ'¨Ó/ˆŒà‰Õr:   c                 ó.   • U R                   R                  $ r(   ©r×  rz  rV  s    r8   rW  Ú#MoonshineModel.get_input_embeddings:  s   € Ø|‰|×(Ñ(Ð(r:   c                 ó$   • XR                   l        g r(   rÛ  rZ  s     r8   r[  Ú#MoonshineModel.set_input_embeddings=  s   € Ø$)‰Õ!r:   c                 ó   • U R                   $ r(   )rÖ  rV  s    r8   Úget_encoderÚMoonshineModel.get_encoder@  ó   € Ø|‰|Ðr:   c                 ó   • U R                   $ r(   )r×  rV  s    r8   Úget_decoderÚMoonshineModel.get_decoderC  râ  r:   c                 ó8   • U R                   R                  5         g)z“
Calling this function will disable the gradient computation for the Moonshine encoder so that its parameters will
not be updated during training.
N)rÖ  Ú_freeze_parametersrV  s    r8   Úfreeze_encoderÚMoonshineModel.freeze_encoderF  s   € ð
 	‰×'Ñ'Õ)r:   Úinput_featuresrf   c                 ó2  • [        U R                  SS5      (       d  U$ UR                  5       u  p4nU R                  R                  S:”  a¦  U R                  (       a•  [        X54U R                  R                  U R                  R                  UU R                  R                  S9n[        R                  " XaR                  [        R                  S9nUSS2S4   R                  SUS5      nSX'   U R                  R                  S:”  a‹  U R                  (       az  [        X44U R                  R                  U R                  R                  U R                  R                  S9n[        R                  " XqR                  [        R                  S9nSX'   U$ )	z‚
Masks extracted features along time axis and/or along feature axis according to
[SpecAugment](https://arxiv.org/abs/1904.08779).
Úapply_spec_augmentTr   )r®  rª  rf   r¯  )rÝ   rk   NrR   )r®  rª  r¯  )r    r,   ÚsizeÚmask_time_probrm   rÒ  Úmask_time_lengthÚmask_time_min_masksrF   ÚtensorrÝ   rÊ   r[   Úmask_feature_probÚmask_feature_lengthÚmask_feature_min_masks)r5   rê  rf   r”  r0   r’  Úmask_time_indicesÚmask_feature_indicess           r8   Ú_mask_input_featuresÚ#MoonshineModel._mask_input_featuresM  sN  € ô t—{‘{Ð$8¸$×?Ñ?Ø!Ð!ð 4B×3FÑ3FÓ3HÑ0ˆ
 à;‰;×%Ñ%¨Ó)¨d¯m¯mä 5ØÐ-ØŸ+™+×4Ñ4Ø ŸK™K×8Ñ8Ø-ØŸ+™+×9Ñ9ñ!Ðô !&§¢Ð->×G\ÑG\Ôdi×dnÑdnÑ oÐØ 1²!°T°'Ñ :× AÑ AÀ"ÀkÐSUÓ VÐØ01ˆNÑ-à;‰;×(Ñ(¨1Ó,°··ä#8ØÐ)ØŸ+™+×7Ñ7Ø ŸK™K×;Ñ;ØŸ+™+×<Ñ<ñ	$Ð ô $)§<¢<Ð0D×MbÑMbÔjo×jtÑjtÑ#uÐ Ø34ˆNÑ0àÐr:   r  Údecoder_input_idsÚdecoder_attention_maskÚencoder_outputsrƒ  Údecoder_inputs_embedsÚdecoder_position_idsrý   r³   r]  r®   r<   c                 ól  • U
b  U
OU R                   R                  n
Ub  UOU R                   R                  nU	b  U	OU R                   R                  n	Uc  U R	                  UUU
US9nOK[        U[        5      (       d6  [        US   [        U5      S:”  a  US   OS[        U5      S:”  a  US   OSS9nU R                  UUUUR                  UUUU	U
UUS9n[        UR                  UR                  UR                  UR                  UR                  UR                  UR                  UR                  S9$ )	aÌ  
input_values (`torch.FloatTensor` of shape `(batch_size, audio_length)`):
    Float values of the raw speech waveform. Raw speech waveform can be
    obtained by loading a `.flac` or `.wav` audio file into an array of type `List[float]` or a
    `numpy.ndarray`, *e.g.* via the soundfile library (`pip install soundfile`). To prepare the array into
    `input_values`, the [`AutoFeatureExtractor`] should be used for padding
    and conversion into a tensor of type `torch.FloatTensor`.
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
    it.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
decoder_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

    [What are attention masks?](../glossary#attention-mask)

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
    `past_key_values`).

    If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
    and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
    information on the default strategy.

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `decoder_input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
decoder_position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.n_positions - 1]`.

    [What are position IDs?](../glossary#position-ids)

Example:

```python
>>> import torch
>>> from transformers import AutoFeatureExtractor, MoonshineModel
>>> from datasets import load_dataset

>>> model = MoonshineModel.from_pretrained("UsefulSensors/moonshine-tiny")
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("UsefulSensors/moonshine-tiny")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
>>> input_values = inputs.input_values
>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
>>> last_hidden_state = model(input_values, decoder_input_ids=decoder_input_ids).last_hidden_state
>>> list(last_hidden_state.shape)
[1, 2, 288]
```
N)rf   r³   r]  r   r    rO   rc  )rv  rf   r  r  rƒ  r„  rŒ   rý   r³   r]  r®   )rd  rƒ  Údecoder_hidden_statesÚdecoder_attentionsr†  Úencoder_last_hidden_stater  Úencoder_attentions)r,   r³   r]  rý   rÖ  ræ   r   rÃ  r×  rd  r   rƒ  r;   re  r†  )r5   r  rf   rù  rú  rû  rƒ  rü  rý  rý   r³   r]  r®   Údecoder_outputss                 r8   r>   ÚMoonshineModel.forwardx  s^  € ð` 2CÑ1NÑ-ÐTX×T_ÑT_×TqÑTqÐà$8Ñ$DÑ È$Ï+É+×JjÑJjð 	ð "+Ñ!6‘I¸D¿K¹K×<QÑ<Qˆ	àÑ"Ø/3¯|©|ØØ-Ø"3Ø%9ð	 0<ð 0‰Oô ˜O¬_×=Ñ=Ü-Ø"1°!Ñ"4Ü47¸Ó4HÈ1Ó4L˜o¨aÒ0ÐRVÜ14°_Ó1EÈÓ1I˜?¨1Ò-ÈtñˆOð FJÇ\Á\Ø'Ø1Ø#1Ø"1×"CÑ"CØ+Ø/Ø-ØØ/Ø!5Ø)ð FRð F
ˆô "Ø-×?Ñ?Ø+×;Ñ;Ø"1×"?Ñ"?Ø.×9Ñ9Ø,×=Ñ=Ø&5×&GÑ&GØ"1×"?Ñ"?Ø.×9Ñ9ñ	
ð 		
r:   )r×  rÖ  r(   )NNNNNNNNNNNN)rB   rC   rD   rE   r!   r+   rW  r[  rà  rä  rè  rF   r  r   rË   r÷  r   r   r   r   r   rÊ   r   r>   rH   rI   rJ   s   @r8   rÔ  rÔ  0  sº  ø† ð˜÷ ò)ò*òòò*ð 6:ñ)à×)Ñ)ð)ð ! ×!1Ñ!1Ñ2õ)ðV Øð 59Ø59Ø8<Ø=AØEIØZ^ØDHØBFØ$(Ø,0Ø/3Ø59ñ{
à˜u×0Ñ0Ñ1ð{
ð ! ×!1Ñ!1Ñ2ð{
ð $ E×$4Ñ$4Ñ5ð	{
ð
 !)¨×)9Ñ)9Ñ :ð{
ð " %¨¨e×.?Ñ.?Ñ(@Ñ"AÑBð{
ð " %Ð(;¸UÀ5×CTÑCTÑ=UÐ(UÑ"VÑWð{
ð  (¨¨e×.?Ñ.?Ñ(@ÑAð{
ð ' u¨U×-=Ñ-=Ñ'>Ñ?ð{
ð ˜D‘>ð{
ð $ D™>ð{
ð ' t™nð{
ð ! ×!1Ñ!1Ñ2ð{
ð 
ô{
ó ó ö{
r:   rÔ  rv  rx  Údecoder_start_token_idc                 óÖ   • U R                  U R                  5      nU SS2SS24   R                  5       USS2SS24'   X#SS2S4'   Uc  [        S5      eUR	                  US:H  U5        U$ )z)
Shift input ids one token to the right.
NrR   r    r   z1self.model.config.pad_token_id has to be defined.iœÿÿÿ)Ú	new_zerosrZ   r¨  rf  Úmasked_fill_)rv  rx  r  Úshifted_input_idss       r8   Úshift_tokens_rightr
  ø  sz   € ð "×+Ñ+¨I¯O©OÓ<ÐØ(ª¨C¨R¨C¨Ñ0×6Ñ6Ó8Ð’a˜™eÑØ4’a˜dÑàÑÜÐLÓMÐMà×"Ñ"Ð#4¸Ñ#<¸lÔKàÐr:   zj
    The Moonshine Model with a language modeling head. Can be used for automatic speech recognition.
    )Úcustom_introc                   ó"  ^ • \ rS rSrS/rS\4U 4S jjrS rS rS r	S r
S	\R                  4S
 jr\\             SS\\R$                     S\\R&                     S\\R&                     S\\R&                     S\\\\R$                           S\\\\\R$                     4      S\\\R$                        S\\\R&                        S\\   S\\   S\\   S\\R&                     S\\R&                     S	\4S jj5       5       rSrU =r$ )Ú!MoonshineForConditionalGenerationi  zproj_out.weightr,   c                 óÂ   >• [         TU ]  U5        [        U5      U l        [        R
                  " UR                  UR                  SS9U l        U R                  5         g )NFr   )
r*   r+   rÔ  r  r.   r/   r0   ry  Úproj_outrP  rØ  s     €r8   r+   Ú*MoonshineForConditionalGeneration.__init__  sH   ø€ Ü‰Ñ˜Ô Ü# FÓ+ˆŒ
ÜŸ	š	 &×"4Ñ"4°f×6GÑ6GÈeÑTˆŒð 	‰Õr:   c                 ó6   • U R                   R                  5       $ r(   )r  rà  rV  s    r8   rà  Ú-MoonshineForConditionalGeneration.get_encoder  ó   € Øz‰z×%Ñ%Ó'Ð'r:   c                 ó6   • U R                   R                  5       $ r(   )r  rä  rV  s    r8   rä  Ú-MoonshineForConditionalGeneration.get_decoder  r  r:   c                 ó   • U R                   $ r(   ©r  rV  s    r8   Úget_output_embeddingsÚ7MoonshineForConditionalGeneration.get_output_embeddings  s   € Ø}‰}Ðr:   c                 ó   • Xl         g r(   r  )r5   Únew_embeddingss     r8   Úset_output_embeddingsÚ7MoonshineForConditionalGeneration.set_output_embeddings!  s   € Ø&r:   r<   c                 ó6   • U R                   R                  5       $ r(   )r  rW  rV  s    r8   rW  Ú6MoonshineForConditionalGeneration.get_input_embeddings$  s   € Øz‰z×.Ñ.Ó0Ð0r:   r  rf   rù  rú  rû  rƒ  rü  rý  rý   r³   r]  r®   Úlabelsc                 óæ  • Ub:  Uc7  Uc4  [        XÐR                  R                  U R                  R                  5      nU R	                  UUUUUUUUU	U
UUS9nU R                  UR                  5      nSnUb$  U R                  XýU R                  R                  S9n[        UUUR                  UR                  UR                  UR                  UR                  UR                  UR                   S9	$ )aw  
input_values (`torch.FloatTensor` of shape `(batch_size, audio_length)`):
    Float values of the raw speech waveform. Raw speech waveform can be
    obtained by loading a `.flac` or `.wav` audio file into an array of type `List[float]` or a
    `numpy.ndarray`, *e.g.* via the soundfile library (`pip install soundfile`). To prepare the array into
    `input_values`, the [`AutoFeatureExtractor`] should be used for padding
    and conversion into a tensor of type `torch.FloatTensor`.
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
    it.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
decoder_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

    [What are attention masks?](../glossary#attention-mask)

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
    `past_key_values`).

    If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
    and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
    information on the default strategy.

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `decoder_input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
decoder_position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.n_positions - 1]`.

    [What are position IDs?](../glossary#position-ids)
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the language modeling loss. Indices should either be in `[0, ..., config.vocab_size]`
    or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored (masked), the loss is
    only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example:

```python
>>> import torch
>>> from transformers import AutoProcessor, MoonshineForConditionalGeneration
>>> from datasets import load_dataset

>>> processor = AutoProcessor.from_pretrained("UsefulSensors/moonshine-tiny")
>>> model = MoonshineForConditionalGeneration.from_pretrained("UsefulSensors/moonshine-tiny")

>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")

>>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt")
>>> input_values = inputs.input_values

>>> generated_ids = model.generate(input_values, max_new_tokens=100)

>>> transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
>>> transcription
'Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'
```N)rf   rù  rû  rú  rƒ  rü  rý  rý   r³   r]  r®   )Úlogitsr   ry  )	Úlossr"  rƒ  rÿ  r   r†  r  r  r  )r
  r,   rx  r  r  r  rd  Úloss_functionry  r   rƒ  rÿ  r   r†  r  r  r  )r5   r  rf   rù  rú  rû  rƒ  rü  rý  rý   r³   r]  r®   r   r  r"  r#  s                    r8   r>   Ú)MoonshineForConditionalGeneration.forward'  s  € ðr ÑØ Ñ(Ð-BÑ-JÜ$6ØŸK™K×4Ñ4°d·k±k×6XÑ6Xó%Ð!ð '+§j¡jØØ)Ø/Ø+Ø#9Ø+Ø"7Ø!5ØØ/Ø!5Ø)ð '1ð '
ˆð —‘˜w×8Ñ8Ó9ˆàˆØÑØ×%Ñ%¨VÈtÏ{É{×OeÑOeÐ%ÐfˆDäØØØ#×3Ñ3Ø")×"?Ñ"?Ø&×9Ñ9Ø$×5Ñ5Ø&-×&GÑ&GØ")×"?Ñ"?Ø&×9Ñ9ñ

ð 
	
r:   )r  r  )NNNNNNNNNNNNN)rB   rC   rD   rE   Ú_tied_weights_keysr!   r+   rà  rä  r  r  r.   rs  rW  r   r   r   rF   r  rË   r   r   r   rÊ   r   r>   rH   rI   rJ   s   @r8   r  r    s»  ø† ð ,Ð,Ðð˜÷ ò(ò(òò'ð1 b§i¡iô 1ð Øð 59Ø59Ø8<Ø=AØEIØZ^ØDHØBFØ$(Ø,0Ø/3Ø59Ø-1ñ{
à˜u×0Ñ0Ñ1ð{
ð ! ×!1Ñ!1Ñ2ð{
ð $ E×$4Ñ$4Ñ5ð	{
ð
 !)¨×)9Ñ)9Ñ :ð{
ð " %¨¨e×.?Ñ.?Ñ(@Ñ"AÑBð{
ð " %Ð(;¸UÀ5×CTÑCTÑ=UÐ(UÑ"VÑWð{
ð  (¨¨e×.?Ñ.?Ñ(@ÑAð{
ð ' u¨U×-=Ñ-=Ñ'>Ñ?ð{
ð ˜D‘>ð{
ð $ D™>ð{
ð ' t™nð{
ð ! ×!1Ñ!1Ñ2ð{
ð ˜×)Ñ)Ñ*ð{
ð 
ô{
ó ó ö{
r:   r  )rÔ  r  r  )r´   )Nr    )Nr   )NÚtypingr   r   r   r   Únumpyrº  rF   Útorch.nnr.   Úactivationsr   Úcache_utilsr	   r
   r   Ú
generationr   Úmodeling_attn_mask_utilsr   r   r   Úmodeling_flash_attention_utilsr   Úmodeling_layersr   Úmodeling_outputsr   r   r   r   r   Úmodeling_rope_utilsr   r   Úmodeling_utilsr   r   Úprocessing_utilsr   Úutilsr   r   r   r   Úconfiguration_moonshiner!   Ú!torch.nn.attention.flex_attentionr"   Úintegrations.flex_attentionr#   Ú
get_loggerrB   r½   rs  r%   rL   rG   rÉ   ra   rå   r|   rƒ   r•   r—   rÍ   rð   r  r  r=  ru  rË   ÚndarrayrÒ  rÔ  r
  r  Ú__all__r   r:   r8   Ú<module>r;     s’  ð÷* 4Ó 3ã Û Ý å !ß CÑ CÝ )÷ñ õ
 CÝ 9÷õ ÷ Lß FÝ &ß \Ó \Ý 4ñ  ×!Ñ!Ý;åJð 
×	Ò	˜HÓ	%€ô˜"Ÿ)™)ô ô˜"Ÿ)™)ô ð 	U˜UŸ\™\ð 	U°#ð 	U¸%¿,¹,ô 	Uð& ñ%ØI‰Ið%à<‰<ð%ð 
‰ð%ð <‰<ð	%ð
 ˜UŸ\™\Ñ*ð%ð ð%ð õ%ò46ô'ôTC)˜Ÿ™ô C)ôL<˜rŸy™yô <ôD8Ð6ô 8ôvUÐ6ô Uðp ô"#˜ó "#ó ð"#ôJH
Ð/ô H
ðV ôWÐ/ó Wó ðWð| 26ØñtØc‰?ðtàðtð ðtð ˜U×-Ñ-Ñ.ð	tð
 ðtð ‡ZZõtðn ôD
Ð-ó D
ó ðD
ðN %§,¡,ð ¸cð Ð[^ô ñ  ðñô
W
Ð(@À/ó W
óð
W
òt ^r:   