o
    rZh{L                     @   s  d dl Z d dl mZ d dlmZ d dlZd dlmZmZm	Z	m
Z
mZmZmZmZ d dlmZ ejdddd	 Zejddd
d Zejdddd Zejdddd Zejdd Zejdddgdfddgd fdddgdd Zdd Zdd Zd d! Zd"d# Zejd$d% Zejdd&ddgdfd'dd(gd)d* Zejd+d, Z ejdddgd-fd.d/d0gd1d2 Z!d3d4 Z"ejd5d6 Z#ejdddgd-fddgd7fdd8dgd-fdd8dgd7fgd9d: Z$ejd;d< Z%ejdddgd=fd>d?d@gdAdB Z&dCdD Z'dEdF Z(ejdGdH Z)ejddIdJd(dd8gdKfddLd8gdMfddNd8gdKfgdOdP Z*ejdQdR Z+ejddSdJddd8gdTfddLd8gdUfddNd8gdTfgdVdW Z,ejdXdY Z-ejddIdJd(dd8gdZfddLd8gd[fddNd8gdZfgd\d] Z.ejd^d_ Z/ejddIdJd(dd8gd`fddLd8gdafddNd8gdbfgdcdd Z0ejdddedf Z1ejdgdd%d,d<dHdYdfej2d_ejj3dhdidjgejjdkg dled dmdndo Z4dpdq Z5drds Z6dtdu Z7dvdw Z8dxdy Z9dzd{ Z:d|d} Z;dS )~    N)fsum)
itemgetter)MLEAbsoluteDiscountingInterpolatedKneserNeyInterpolatedLaplaceLidstoneStupidBackoff
VocabularyWittenBellInterpolated)padded_everygramssession)scopec                   C   s   t g dddS )N)abcdz<s></s>   )Z
unk_cutoff)r
    r   r   L/var/www/auris/lib/python3.10/site-packages/nltk/test/unit/lm/test_models.py
vocabulary      r   c                   C   s   g dg dgS )N)r   r   r   r   )egr   r   r   r   r   r   r   r   r   training_data   r   r   c                 C      dd | D S )Nc                 S      g | ]	}t td |qS )   listr   .0sentr   r   r   
<listcomp>&       z(bigram_training_data.<locals>.<listcomp>r   r   r   r   r   bigram_training_data$      r)   c                 C   r   )Nc                 S   r   )   r!   r#   r   r   r   r&   +   r'   z)trigram_training_data.<locals>.<listcomp>r   r(   r   r   r   trigram_training_data)   r*   r,   c                 C   s   t d| d}|| |S Nr    r   r   fit)r   r)   modelr   r   r   mle_bigram_model.      
r2   zword, context, expected_scorer   r   r   r   )r   Nr   )r   Ng$I$I?)yNg۶m۶m?c                 C       t | ||d|ksJ d S N-C6?pytestapproxscore)r2   wordcontextexpected_scorer   r   r   test_mle_bigram_scores5   s    r?   c                 C   s   t | ddgsJ d S )Nr   r   )mathisinfZlogscore)r2   r   r   r   'test_mle_bigram_logscore_for_zero_scoreH   s   rB   c                 C   H   g d}d}d}t | |d|ksJ t | |d|ks"J d S )N)r   r   )r   r   )r   <UNK>)rE   r   )r   r   r   r   g(\?g_vO@r7   r9   r:   entropy
perplexity)r2   ZtrainedHrI   r   r   r   'test_mle_bigram_entropy_perplexity_seenL   s
   rK   c                 C   s4   g d}t | |sJ t | |sJ d S )N)rD   r   r   )r   r   rF   )r@   rA   rH   rI   )r2   Z	untrainedr   r   r   )test_mle_bigram_entropy_perplexity_unseene   s   rM   c                 C   sH   d}d}g d}t | |d|ksJ t | |d|ks"J d S )Ng~jt@gs @)r   r   r   )-r   rP   )r   r7   rG   )r2   rJ   rI   textr   r   r   +test_mle_bigram_entropy_perplexity_unigramsm   s
   rT   c                 C      t d|d}||  |S Nr+   orderr   r/   r,   r   r1   r   r   r   mle_trigram_model   r3   rZ   )r   )r   r   r   )r   Ngqq?)r4   NUUUUUU?c                 C   r5   r6   r8   )rZ   r<   r=   r>   r   r   r   test_mle_trigram_scores   s    r\   c                 C      t dd|d}||  |S )N皙?r    rW   r   r0   r)   r   r1   r   r   r   lidstone_bigram_model      
ra   g88?)r   Ng"u)?)r   Ngк{?)r4   NgL?c                 C   r5   r6   r8   )ra   r<   r=   r>   r   r   r   test_lidstone_bigram_score   s   rc   c                 C   rC   )NrD   rL   )r   rE   )rE   r   )r   r   )r   r   g=,Ԛ]@g鷯1@r7   rG   )ra   rS   rJ   rI   r   r   r    test_lidstone_entropy_perplexity   
   re   c                 C   r]   )Nr^   r+   rW   r_   rY   r   r   r   lidstone_trigram_model   rb   rg   gqq?r   c                 C   r5   r6   r8   )rg   r<   r=   r>   r   r   r   test_lidstone_trigram_score   s   rh   c                 C   rU   r-   )r   r0   r`   r   r   r   laplace_bigram_model   r3   ri   gqq?)r   NgtE]t?)r   NgF]tE?)r4   NgF]tE?c                 C   r5   r6   r8   )ri   r<   r=   r>   r   r   r   test_laplace_bigram_score   s    rj   c                 C   rC   )Nrd   gQ	@gݓz!@r7   rG   )ri   rS   rJ   rI   r   r   r   &test_laplace_bigram_entropy_perplexity  rf   rk   c                 C   s   | j dksJ d S )Nr   )gamma)ri   r   r   r   test_laplace_gamma5  s   rm   c                 C   rU   )Nr+   r.   )r   r0   rY   r   r   r   wittenbell_trigram_model9  r3   rn   )r   Ngqq?)r   Ng        grq?r   gqq?r   c                 C   r5   r6   r8   )rn   r<   r=   r>   r   r   r   test_wittenbell_trigram_score@  s   "ro   c                 C   r]   )Nr+   g      ?)rX   Zdiscountr   r   r0   rY   r   r   r   kneserney_trigram_modelr  rb   rq   )r   Ng$I$I?gm۶m?g$I$I?c                 C   r5   r6   r8   )rq   r<   r=   r>   r   r   r   test_kneserney_trigram_scorey  s   (rr   c                 C   rU   rV   )r   r0   rY   r   r   r   "absolute_discounting_trigram_model  r3   rs   r[   g      ?c                 C   r5   r6   r8   )rs   r<   r=   r>   r   r   r   'test_absolute_discounting_trigram_score  s   %rt   c                 C   rU   rV   )r	   r0   rY   r   r   r   stupid_backoff_trigram_model  r3   ru   g      ?      ?g?c                 C   r5   r6   r8   )ru   r<   r=   r>   r   r   r   !test_stupid_backoff_trigram_score  s   rw   c                 C   rU   )Nr    rW   rp   r`   r   r   r   kneserney_bigram_model  r3   rx   model_fixturez*Stupid Backoff is not a valid distribution)reason)Zmarksr=   )	rO   rP   rN   )r   rE   rR   )r   )r)w)Zidsc                    s<   | | t fddjD }t|ddksJ d S )Nc                 3   s    | ]	} | V  qd S N)r;   )r$   r}   r=   r1   r   r   	<genexpr>!  s    z!test_sums_to_1.<locals>.<genexpr>gHz>rv   )ZgetfixturevaluesumZvocabr9   r:   )ry   r=   requestZscores_for_contextr   r   r   test_sums_to_1  s   
r   c                 C   s   | j dddks
J d S )Nr+   random_seedrE   generaterZ   r   r   r   test_generate_one_no_context*  s   r   c                 C   sJ   | j dgddksJ | j ddgddksJ | j ddgddks#J d S )Nr   	text_seedr   r   r   r   r   r   r   r   'test_generate_one_from_limiting_context.  s   r   c                 C   s   | j ddddksJ d S )N)r   r   r    r   r   r   r   r   r   r   r   %test_generate_one_from_varied_context5  s   r   c                 C   s<   t | jtdg}| | | jddddg dksJ d S )NZbdbdbd   )r   r      r   )r   r   r   r   r   r   r   )r   rX   r"   r0   r   )rZ   Zmore_training_textr   r   r   test_generate_cycle:  s   
 r   c                 C   s    | j ddddg dksJ d S )Nr   )r   r   r+   r   )rE   r   r   r   rE   r   r   r   r   r   test_generate_with_text_seedK  s    r   c                 C   s$   | j ddd| j dddksJ d S )N)Zaliensr+   r   r{   r   r   r   r   r   test_generate_oov_text_seedU  s
   r   c                 C   sX   t t | jdd W d    n1 sw   Y  | jd dd| jddks*J d S )Nr~   r   r+   r   r   )r9   Zraises	TypeErrorr   r   r   r   r   test_generate_None_text_seed[  s   
r   )<r@   r   r   operatorr   r9   Znltk.lmr   r   r   r   r   r	   r
   r   Znltk.lm.preprocessingr   Zfixturer   r   r)   r,   r2   markZparametrizer?   rB   rK   rM   rT   rZ   r\   ra   rc   re   rg   rh   ri   rj   rk   rm   rn   ro   rq   rr   rs   rt   ru   rw   rx   paramZxfailr   r   r   r   r   r   r   r   r   r   r   r   <module>   sD  (






























	
$	


!	





