
    /h{L                     <   S SK r S SK Jr  S SKJr  S SKrS SKJrJrJ	r	J
r
JrJrJrJr  S SKJr  \R"                  " SS9S 5       r\R"                  " SS9S	 5       r\R"                  " SS9S
 5       r\R"                  " SS9S 5       r\R"                  S 5       r\R.                  R1                  SSS/S4SS/S 4SSS/5      S 5       rS rS rS rS r\R"                  S 5       r\R.                  R1                  SSSS/S4SSS/5      S 5       r\R"                  S 5       r \R.                  R1                  SSS/S 4S!S"S#/5      S$ 5       r!S% r"\R"                  S& 5       r#\R.                  R1                  SSS/S 4SS/S'4SS(S/S 4SS(S/S'4/5      S) 5       r$\R"                  S* 5       r%\R.                  R1                  SSS/S+4S,S-S./5      S/ 5       r&S0 r'S1 r(\R"                  S2 5       r)\R.                  R1                  SS3S4SSS(/S54SS6S(/S74SS8S(/S54/5      S9 5       r*\R"                  S: 5       r+\R.                  R1                  SS;S4SSS(/S<4SS6S(/S=4SS8S(/S<4/5      S> 5       r,\R"                  S? 5       r-\R.                  R1                  SS3S4SSS(/S@4SS6S(/SA4SS8S(/S@4/5      SB 5       r.\R"                  SC 5       r/\R.                  R1                  SS3S4SSS(/SD4SS6S(/SE4SS8S(/SF4/5      SG 5       r0\R"                  " SS9SH 5       r1\R.                  R1                  SISJSKSLSMSNSOSP\Rd                  " SQ\R.                  Rg                  SRSS9ST9/5      \R.                  R1                  SU/ SVQ\" S 5      SW9SX 5       5       r4SY r5SZ r6S[ r7S\ r8S] r9S^ r:S_ r;g)`    N)fsum)
itemgetter)MLEAbsoluteDiscountingInterpolatedKneserNeyInterpolatedLaplaceLidstoneStupidBackoff
VocabularyWittenBellInterpolated)padded_everygramssession)scopec                      [        / SQSS9$ )N)abcdz<s></s>   )
unk_cutoff)r        U/var/www/auris/envauris/lib/python3.13/site-packages/nltk/test/unit/lm/test_models.py
vocabularyr      s    >1MMr   c                      / SQ/ SQ/$ )N)r   r   r   r   )egr   r   r   r   r   r   r   r   training_datar!      s     "@AAr   c           	      X    U  Vs/ s H  n[        [        SU5      5      PM     sn$ s  snf )N   listr   r!   sents     r   bigram_training_datar(   $   '    9FGD"1d+,GGG   'c           	      X    U  Vs/ s H  n[        [        SU5      5      PM     sn$ s  snf )N   r$   r&   s     r   trigram_training_datar-   )   r)   r*   c                 <    [        SU S9nUR                  U5        U$ Nr#   r   r   fit)r   r(   models      r   mle_bigram_modelr4   .   s     j)E	II"#Lr   zword, context, expected_scorer   r   r   r   )r   Nr   )r   Ng$I$I?)yNg۶m۶m?c                 \    [         R                  " U R                  X5      S5      U:X  d   eg N-C6?pytestapproxscore)r4   wordcontextexpected_scores       r   test_mle_bigram_scoresr@   5   s)      ==)//>EWWWr   c                 `    [         R                  " U R                  SS/5      5      (       d   eg )Nr   r   )mathisinflogscore)r4   s    r   'test_mle_bigram_logscore_for_zero_scorerE   H   s'    ::&//cU;<<<<r   c                     / SQnSnSn[         R                  " U R                  U5      S5      U:X  d   e[         R                  " U R                  U5      S5      U:X  d   eg )N)r   r   )r   r   )r   <UNK>)rH   r   )r   r   r   r   g(\?g_vO@r8   r:   r;   entropy
perplexity)r4   trainedHrL   s       r   'test_mle_bigram_entropy_perplexity_seenrO   L   s_    G" 	AJ==)11':DAQFFF==)44W=tD
RRRr   c                     / SQn[         R                  " U R                  U5      5      (       d   e[         R                  " U R                  U5      5      (       d   eg )N)rG   r   r   )r   r   rI   )rB   rC   rK   rL   )r4   	untraineds     r   )test_mle_bigram_entropy_perplexity_unseenrS   e   sI    EI::&..y9::::::&11)<====r   c                     SnSn/ SQn[         R                  " U R                  U5      S5      U:X  d   e[         R                  " U R                  U5      S5      U:X  d   eg )Ng~jt@gs @)r   r   r   )-r   rW   )r   r8   rJ   )r4   rN   rL   texts       r   +test_mle_bigram_entropy_perplexity_unigramsr[   m   s]     	AJHD==)11$7>!CCC==)44T:DAZOOOr   c                 <    [        SUS9nUR                  U 5        U$ Nr,   orderr   r1   r-   r   r3   s      r   mle_trigram_modelra      s     aJ/E	II#$Lr   )r   )r   r   r   )r   Ngqq?)r5   NUUUUUU?c                 \    [         R                  " U R                  X5      S5      U:X  d   eg r7   r9   )ra   r=   r>   r?   s       r   test_mle_trigram_scoresrd      s)    $ ==*00?F.XXXr   c                 >    [        SSUS9nUR                  U 5        U$ )N皙?r#   r^   r	   r2   r(   r   r3   s      r   lidstone_bigram_modelri      s"    Sj9E	II"#Lr   g88?)r   Ng"u)?)r   Ngк{?)r5   NgL?c                 \    [         R                  " U R                  X5      S5      U:X  d   eg r7   r9   )ri   r=   r>   r?   s       r   test_lidstone_bigram_scorerk      s0    4 	+11$@$G		r   c                     / SQnSnSn[         R                  " U R                  U5      S5      U:X  d   e[         R                  " U R                  U5      S5      U:X  d   eg )NrG   rQ   )r   rH   )rH   r   )r   r   )r   r   g=,Ԛ]@g鷯1@r8   rJ   )ri   rZ   rN   rL   s       r    test_lidstone_entropy_perplexityrn      s_    D$ 	AJ==.66t<dCqHHH==.99$?F*TTTr   c                 >    [        SSUS9nUR                  U 5        U$ )Nrf   r,   r^   rg   r`   s      r   lidstone_trigram_modelrp      s"    Sj9E	II#$Lr   gqq?r   c                 \    [         R                  " U R                  X5      S5      U:X  d   eg r7   r9   )rp   r=   r>   r?   s       r   test_lidstone_trigram_scorerr      s0     	,224A4H		r   c                 <    [        SUS9nUR                  U 5        U$ r/   )r   r2   rh   s      r   laplace_bigram_modelrt      s     A*-E	II"#Lr   gqq?)r   NgtE]t?)r   NgF]tE?)r5   NgF]tE?c                 \    [         R                  " U R                  X5      S5      U:X  d   eg r7   r9   )rt   r=   r>   r?   s       r   test_laplace_bigram_scorerv      s+    6 	*00?F.XXr   c                     / SQnSnSn[         R                  " U R                  U5      S5      U:X  d   e[         R                  " U R                  U5      S5      U:X  d   eg )Nrm   gQ	@gݓz!@r8   rJ   )rt   rZ   rN   rL   s       r   &test_laplace_bigram_entropy_perplexityrx     s_    D$ 	AJ==-55d;TBaGGG==-88>ESSSr   c                 (    U R                   S:X  d   eg )Nr   )gamma)rt   s    r   test_laplace_gammar{   5  s    %%***r   c                 <    [        SUS9nUR                  U 5        U$ )Nr,   r0   )r   r2   r`   s      r   wittenbell_trigram_modelr}   9  s     "1<E	II#$Lr   )r   Ngqq?)r   Ng        grq?r   gqq?r   c                 \    [         R                  " U R                  X5      S5      U:X  d   eg r7   r9   )r}   r=   r>   r?   s       r   test_wittenbell_trigram_scorer   @  s1    D 	.44TCTJ		r   c                 >    [        SSUS9nUR                  U 5        U$ )Nr,   g      ?)r_   discountr   r   r2   r`   s      r   kneserney_trigram_modelr   r  s"    !DZPE	II#$Lr   )r   Ng$I$I?gm۶m?g$I$I?c                 \    [         R                  " U R                  X5      S5      U:X  d   eg r7   r9   )r   r=   r>   r?   s       r   test_kneserney_trigram_scorer   y  s1    P 	-33DBDI		r   c                 <    [        SUS9nUR                  U 5        U$ r]   )r   r2   r`   s      r   "absolute_discounting_trigram_modelr     s     +!
KE	II#$Lr   rb   g      ?c                 \    [         R                  " U R                  X5      S5      U:X  d   eg r7   r9   )r   r=   r>   r?   s       r   'test_absolute_discounting_trigram_scorer     s1    J 	8>>tMtT		r   c                 <    [        SUS9nUR                  U 5        U$ r]   )r
   r2   r`   s      r   stupid_backoff_trigram_modelr     s     j9E	II#$Lr   g      ?      ?g?c                 \    [         R                  " U R                  X5      S5      U:X  d   eg r7   r9   )r   r=   r>   r?   s       r   !test_stupid_backoff_trigram_scorer     s0    2 	288GN		r   c                 <    [        SUS9nUR                  U 5        U$ )Nr#   r^   r   rh   s      r   kneserney_bigram_modelr     s     !jAE	II"#Lr   model_fixturer4   ra   ri   rt   r}   r   r   r   z*Stupid Backoff is not a valid distribution)reason)marksr>   )	rV   rW   rU   )r   rH   rY   )r   )r)w)idsc                    ^^ UR                  U 5      m[        UU4S jTR                   5       5      n[        R                  " US5      S:X  d   eg )Nc              3   H   >#    U  H  nTR                  UT5      v   M     g 7fN)r<   ).0r   r>   r3   s     r   	<genexpr>!test_sums_to_1.<locals>.<genexpr>!  s     JkU[[G44ks   "gHz>r   )getfixturevaluesumvocabr:   r;   )r   r>   requestscores_for_contextr3   s    `  @r   test_sums_to_1r     sD    0 ##M2EJekkJJ==+T2c999r   c                 .    U R                  SS9S:X  d   eg )Nr,   random_seedrH   generatera   s    r   test_generate_one_no_contextr   *  s    %%!%4???r   c                     U R                  S/S9S:X  d   eU R                  SS/S9S:X  d   eU R                  SS/S9S:X  d   eg )Nr   	text_seedr   r   r   r   r   s    r   'test_generate_one_from_limiting_contextr   .  s`    %%%6#===%%c
%;sBBB%%c
%;sBBBr   c                 0    U R                  SSS9S:X  d   eg )N)r   r   r#   r   r   r   r   r   s    r   %test_generate_one_from_varied_contextr   5  s!    %%!%LPSSSSr   c                     [        U R                  [        S5      5      /nU R                  U5        U R	                  SSSS9/ SQ:X  d   eg )Nbdbdbd   )r   r      r   )r   r   r   r   r   r   r   )r   r_   r%   r2   r   )ra   more_training_texts     r   test_generate_cycler   :  sV    +,=,C,CT(^TU,-%%a:1%M R   r   c                 6    U R                  SSSS9/ SQ:X  d   eg )Nr   )r   r   r,   r   )rH   r   r   r   rH   r   r   s    r   test_generate_with_text_seedr   K  s,    %%a<Q%O T   r   c                 L    U R                  SSS9U R                  SSS9:X  d   eg )N)aliensr,   r   r   r   r   s    r   test_generate_oov_text_seedr   U  s?    %%1 & 		#	#ja	#	HI I Ir   c                     [         R                  " [        5         U R                  SS9  S S S 5        U R                  S SS9U R                  SS9:X  d   eg ! , (       d  f       N2= f)Nr   r   r,   r   r   )r:   raises	TypeErrorr   r   s    r   test_generate_None_text_seedr   [  sg    	y	!""W"5 
" %%A & 		#	#	#	23 3 3	 
"	!s   A
A%)<rB   r   r   operatorr   r:   nltk.lmr   r   r   r   r	   r
   r   r   nltk.lm.preprocessingr   fixturer   r!   r(   r-   r4   markparametrizer@   rE   rO   rS   r[   ra   rd   ri   rk   rn   rp   rr   rt   rv   rx   r{   r}   r   r   r   r   r   r   r   r   paramxfailr   r   r   r   r   r   r   r   r   r   r   <module>r      s      	 	 	 4 i N !N i B !B i H !H i H !H   #	seQ	seQ 	XX=S2>P(   # 	 
seQ"Y#"Y   # 
seY 	  	  	 '010U2   # 
seY	seY	sCj)$	sCj)$   # 
seW 	 	 	)232T2+   # 	 	 	 
se34
 
sCjLM 
sCj9;3<=<(   # 	 	 	 
se-/ 
sCjAB 
sCj24? #HI#H   # 	 	 	 
se79 
sCjKL 
sCj<>9 BC B   # 	 	 		seU	sCj%  
sCj=*!*+* i  ! ", *++##C $ 	
$ R1  
:%.:@CT
"I3r   