
    /h                      j     S SK r S SKJr   " S S\5      rS r\S:X  a  \" 5         gg! \ a     N+f = f)    N)VectorSpaceClustererc                   \    \ rS rSrSr      SS jrS rSS jrS rS r	S	 r
S
 rS rSrg)EMClusterer   a  
The Gaussian EM clusterer models the vectors as being produced by
a mixture of k Gaussian sources. The parameters of these sources
(prior probability, mean and covariance matrix) are then found to
maximise the likelihood of the given data. This is done with the
expectation maximisation algorithm. It starts with k arbitrarily
chosen means, priors and covariance matrices. It then calculates
the membership probabilities for each vector in each of the
clusters; this is the 'E' step. The cluster parameters are then
updated in the 'M' step using the maximum likelihood estimate from
the cluster membership probabilities. This process continues until
the likelihood of the data does not significantly increase.
Nc                     [         R                  " XU5        [        R                  " U[        R                  5      U l        [        U5      U l        X@l        X0l	        X l
        XPl        g)a  
Creates an EM clusterer with the given starting parameters,
convergence threshold and vector mangling parameters.

:param  initial_means: the means of the gaussian cluster centers
:type   initial_means: [seq of] numpy array or seq of SparseArray
:param  priors: the prior probability for each cluster
:type   priors: numpy array or seq of float
:param  covariance_matrices: the covariance matrix for each cluster
:type   covariance_matrices: [seq of] numpy array
:param  conv_threshold: maximum change in likelihood before deemed
            convergent
:type   conv_threshold: int or float
:param  bias: variance bias used to ensure non-singular covariance
              matrices
:type   bias: float
:param  normalise:  should vectors be normalised to length 1
:type   normalise:  boolean
:param  svd_dimensions: number of dimensions to use in reducing vector
                       dimensionsionality with SVD
:type   svd_dimensions: int
N)r   __init__numpyarrayfloat64_meanslen_num_clusters_conv_threshold_covariance_matrices_priors_bias)selfinitial_meanspriorscovariance_matricesconv_thresholdbias	normalisesvd_dimensionss           G/var/www/auris/envauris/lib/python3.13/site-packages/nltk/cluster/em.pyr   EMClusterer.__init__   sN    @ 	%%d~Fkk-? /-$7!
    c                     U R                   $ N)r   r   s    r   num_clustersEMClusterer.num_clustersG   s    !!!r   c           
      `   [        U5      S:  d   e[        US   5      nU R                  nU R                  nU(       dB  [        R                  " U R
                  [        R                  5      U R
                  -  =oPl        U R                  nU(       dO  [        U R
                  5       Vs/ s H(  n[        R                  " U[        R                  5      PM*     sn=o`l        U R                  XXF5      nSn	U	(       Gd+  U(       a  [        SU5        [        R                  " [        U5      U R
                  4[        R                  5      n
[        [        U5      5       H`  n[        U R
                  5       H%  nX[   U R                  XK   Xk   X   5      -  XU4'   M'     XS S 24==   [        XS S 24   5      -  ss'   Mb     [        U R
                  5       GH  nXk   n[        R                  " X34[        R                  5      n[        R                  " U[        R                  5      nSn[        [        U5      5       HJ  nX   XK   -
  nXX{4   [        R                  R!                  UU5      -  -  nXX{4   -  nXX{4   X   -  -  nML     X-  Xk'   X-  XK'   U[        U5      -  X['   Xk==   U R"                  [        R                  " U[        R                  5      -  -  ss'   GM     U R                  XXF5      n[%        UU-
  5      U R&                  :  a  Sn	UnU	(       d  GM*  g g s  snf )Nr   Fziteration; loglikelihood        T)r   r   r   r	   onesr   r   r   rangeidentity_loglikelihoodprintzeros	_gaussiansummultiplyouterr   absr   )r   vectorstrace
dimensionsmeansr   covariancesilastl	convergedhjcovariance_beforenew_covariancenew_meansum_hjdeltals                     r   cluster_vectorspaceEMClusterer.cluster_vectorspaceJ   s   7|a _


4--u}}=@R@RRF\ // t11272A z5==927 K3 ##GUH	0%8S\4+=+=>NA3w<(t112A$i$..+.'*+ AdG 3 Q$3qAw<' ) 4--.$/N!!&j-Eu}}!U ;;z5==As7|,A#J1E"g0D0DUE0R&RRNg%F!$'* 44H	 -
 "0!8#,"S\1	 $**u~~j%--/X"XX /$ ##GUHA 519~ 4 44 	EI )7s   +/L+c                     S n[        U R                  5       HT  nU R                  U   U R                  U R                  U   U R
                  U   U5      -  nU(       a
  XBS   :  d  MQ  XC4nMV     US   $ )Nr      )r&   r   r   r+   r   r   )r   vectorbestr9   ps        r   classify_vectorspace EMClusterer.classify_vectorspace   sr    t))*AQ$..A 9 9! <f# A 1Aw;v + Awr   c                     U R                  5       R                  U5      nU R                  U   U R                  U R                  U   U R
                  U   U5      -  $ r   )cluster_namesindexr   r+   r   r   )r   rD   clustercids       r   likelihood_vectorspace"EMClusterer.likelihood_vectorspace   sW      "((1||G$t~~KK $";";G"Df(
 
 	
r   c                    [        U5      nUR                  XD4:X  d   S[        UR                  5      -  5       e [        R                  R                  U5      n[        R                  R                  U5      nUS-  S[        R                  -  U* S-  -  -  nX1-
  n[        X5        S[        R                  " [        R                  " X5      U5      -  n	U[        R                  " U	5      -  $ ! [         a     gf = f)Nzbad sized covariance matrix, %sg         g       @r   )r   shapestrr	   linalgdetinvpir)   dotexpOverflowError)
r   meancvmxmrU   rV   adxbs
             r   r+   EMClusterer._gaussian   s    IyyQF"V$ECII$VV"	,,""3'C,,""3'CT	Q\rCx88AB"Nuyy2!3R88Auyy|## 	 	s   B3C/ /
C<;C<c           	          SnU HX  nSn[        [        U5      5       H"  nXrU   U R                  X8   XH   U5      -  -  nM$     U[        R                  " U5      -  nMZ     U$ )Nr$   r   )r&   r   r+   r	   log)	r   r0   r   r3   r4   llhrD   rF   r9   s	            r   r(   EMClusterer._loglikelihood   sd    FA3v;'AY+.&!QQQ (599Q<C	 
 
r   c                 2    S[        U R                  5      -  $ )Nz<EMClusterer means=%s>)listr   r    s    r   __repr__EMClusterer.__repr__   s    '$t{{*;;;r   )r   r   r   r   r   r   )NNgư>皙?FN)F)__name__
__module____qualname____firstlineno____doc__r   r!   r@   rG   rN   r+   r(   ri   __static_attributes__ r   r   r   r      sE    "  &P":x
 <r   r   c                  V   SSK Jn   SS/SS/SS/4 Vs/ s H  n[        R                  " U5      PM     nnSS/SS	//nU R	                  US
S9nUR                  USSS9n[        SU5        [        SU5        [        5         [        S5       Hd  n[        SU5        [        SUR                  U   5        [        SUR                  U   5        [        SUR                  U   5        [        5         Mf     [        R                  " SS/5      n[        SU-  SS9  [        UR                  U5      5        [        R                  " SS/5      n[        SU-  5        UR                  U5      nUR                  5        H'  n	[        U	 SUR                  U	5      S-  S S35        M)     gs  snf )zG
Non-interactive demonstration of the clusterers with simple 2-D data.
r   )rL   g      ?g      ?rC         rQ   gGz @rk   )r   T)r1   z
Clustered:z
As:       zCluster:zPrior:  zMean:   zCovar:  zclassify(%s): )endzclassification_probdist(%s):z => d   z.0f%N)nltkrL   r	   r
   r   r)   r&   r   r   r   classifyclassification_probdistsamplesprob)
rL   fr0   r3   	clustererclusterscrD   pdistsamples
             r   demor      s   
  *-c
S#JA'GH'G!u{{1~'GGHVaYE##E#4I  $d ;H	, 	,!	G1Xj!j)++A./j)**1-.j)88;<  [[!Q F	/F
",	)

V
$% [[!Q F	
(6
12--f5E--/UZZ/#5c:!<= "5 Is    F&__main__)r	   ImportErrornltk.cluster.utilr   r   r   rl   rr   r   r   <module>r      sQ   	 3`<& `<F$>N zF a  		s   ) 22