
    /h?l                     <    S r SrSSKrSSKJr   " S S\5      rS rg)a  
Porter Stemmer

This is the Porter stemming algorithm. It follows the algorithm
presented in

Porter, M. "An algorithm for suffix stripping." Program 14.3 (1980): 130-137.

with some optional deviations that can be turned on or off with the
`mode` argument to the constructor.

Martin Porter, the algorithm's inventor, maintains a web page about the
algorithm at

    https://www.tartarus.org/~martin/PorterStemmer/

which includes another Python implementation and other implementations
in many languages.
	plaintext    N)StemmerIc                       \ rS rSrSrSrSrSr\4S jrS r	S r
S	 rS
 rS rS rS rS rS rS rS rS rS rS rS rS rSS jrS rSrg)PorterStemmer   a  
A word stemmer based on the Porter stemming algorithm.

    Porter, M. "An algorithm for suffix stripping."
    Program 14.3 (1980): 130-137.

See https://www.tartarus.org/~martin/PorterStemmer/ for the homepage
of the algorithm.

Martin Porter has endorsed several modifications to the Porter
algorithm since writing his original paper, and those extensions are
included in the implementations on his website. Additionally, others
have proposed further improvements to the algorithm, including NLTK
contributors. There are thus three modes that can be selected by
passing the appropriate constant to the class constructor's `mode`
attribute:

- PorterStemmer.ORIGINAL_ALGORITHM

    An implementation that is faithful to the original paper.

    Note that Martin Porter has deprecated this version of the
    algorithm. Martin distributes implementations of the Porter
    Stemmer in many languages, hosted at:

    https://www.tartarus.org/~martin/PorterStemmer/

    and all of these implementations include his extensions. He
    strongly recommends against using the original, published
    version of the algorithm; only use this mode if you clearly
    understand why you are choosing to do so.

- PorterStemmer.MARTIN_EXTENSIONS

    An implementation that only uses the modifications to the
    algorithm that are included in the implementations on Martin
    Porter's website. He has declared Porter frozen, so the
    behaviour of those implementations should never change.

- PorterStemmer.NLTK_EXTENSIONS (default)

    An implementation that includes further improvements devised by
    NLTK contributors or taken from other modified implementations
    found on the web.

For the best stemming, you should use the default NLTK_EXTENSIONS
version. However, if you need to get the same results as either the
original algorithm or one of Martin Porter's hosted versions for
compatibility with an existing implementation or dataset, you can use
one of the other modes instead.
NLTK_EXTENSIONSMARTIN_EXTENSIONSORIGINAL_ALGORITHMc                 \   UU R                   U R                  U R                  4;  a  [        S5      eXl        U R                  U R                   :X  aH  SS/S/S/S/S/SS	/S
S/SS/S/S/S/S/S.n0 U l        U H  nX#    H  nX0R
                  U'   M     M     [        / SQ5      U l        g )NzwMode must be one of PorterStemmer.NLTK_EXTENSIONS, PorterStemmer.MARTIN_EXTENSIONS, or PorterStemmer.ORIGINAL_ALGORITHMskyskiesdyinglyingtyingnewsinningsinningoutingsoutingcanningscanninghoweproceedexceedsucceed)r   dielietier   r   r   r   r   r   r   r   )aeiou)r   r	   r
   
ValueErrormodepool	frozensetvowels)selfr%   irregular_formskeyvals        H/var/www/auris/envauris/lib/python3.13/site-packages/nltk/stem/porter.py__init__PorterStemmer.__init__V   s      ""##
 

 3  	99,,,
 w'yyy$h/$h/&	2%;#*%;O DI&*/C%(IIcN 0 '   9:    c                 z    X   U R                   ;   a  gX   S:X  a   US:X  a  gU R                  XS-
  5      (       + $ g)a  Returns True if word[i] is a consonant, False otherwise

A consonant is defined in the paper as follows:

    A consonant in a word is a letter other than A, E, I, O or
    U, and other than Y preceded by a consonant. (The fact that
    the term `consonant' is defined to some extent in terms of
    itself does not make it ambiguous.) So in TOY the consonants
    are T and Y, and in SYZYGY they are S, Z and G. If a letter
    is not a consonant it is a vowel.
Fyr   T   )r(   _is_consonant)r)   wordr!   s      r-   r4   PorterStemmer._is_consonant~   sC     7dkk!7c>Av--dE:::r0   c                     Sn[        [        U5      5       H%  nU R                  X5      (       a  US-  nM   US-  nM'     UR                  S5      $ )a  Returns the 'measure' of stem, per definition in the paper

From the paper:

    A consonant will be denoted by c, a vowel by v. A list
    ccc... of length greater than 0 will be denoted by C, and a
    list vvv... of length greater than 0 will be denoted by V.
    Any word, or part of a word, therefore has one of the four
    forms:

        CVCV ... C
        CVCV ... V
        VCVC ... C
        VCVC ... V

    These may all be represented by the single form

        [C]VCVC ... [V]

    where the square brackets denote arbitrary presence of their
    contents. Using (VC){m} to denote VC repeated m times, this
    may again be written as

        [C](VC){m}[V].

    m will be called the \measure\ of any word or word part when
    represented in this form. The case m = 0 covers the null
    word. Here are some examples:

        m=0    TR,  EE,  TREE,  Y,  BY.
        m=1    TROUBLE,  OATS,  TREES,  IVY.
        m=2    TROUBLES,  PRIVATE,  OATEN,  ORRERY.
 cvvc)rangelenr4   count)r)   stemcv_sequencer!   s       r-   _measurePorterStemmer._measure   sW    D  s4y!A!!$**s"s"	 "   &&r0   c                 *    U R                  U5      S:  $ )Nr   rA   )r)   r?   s     r-   _has_positive_measure#PorterStemmer._has_positive_measure   s    }}T"Q&&r0   c                 j    [        [        U5      5       H  nU R                  X5      (       a  M    g   g)z1Returns True if stem contains a vowel, else FalseTF)r<   r=   r4   )r)   r?   r!   s      r-   _contains_vowelPorterStemmer._contains_vowel   s.    s4y!A%%d.. " r0   c                     [        U5      S:  =(       a0    US   US   :H  =(       a    U R                  U[        U5      S-
  5      $ )zZImplements condition *d from the paper

Returns True if word ends with a double consonant
   r3   r=   r4   r)   r5   s     r-   _ends_double_consonant$PorterStemmer._ends_double_consonant   sF     IN 8RDH$8""4TQ7	
r0   c                    [        U5      S:  =(       a|    U R                  U[        U5      S-
  5      =(       aW    U R                  U[        U5      S-
  5      (       + =(       a-    U R                  U[        U5      S-
  5      =(       a    US   S;  =(       de    U R                  U R                  :H  =(       aE    [        U5      S:H  =(       a0    U R                  US5      (       + =(       a    U R                  US5      $ )zImplements condition *o from the paper

From the paper:

    *o  - the stem ends cvc, where the second c is not W, X or Y
          (e.g. -WIL, -HOP).
   rK   r3   rL   )wxr2   r   )r=   r4   r%   r   rO   s     r-   	_ends_cvcPorterStemmer._ends_cvc   s     IN 0""4TQ70&&tSY];;0 ""4TQ70 R/
 II--- ,D	Q,&&tQ//, ""4+	
r0   c                 t    UR                  U5      (       d   S5       eUS:X  a  X-   $ US[        U5      *  U-   $ )z-Replaces `suffix` of `word` with `replacementz(Given word doesn't end with given suffixr8   N)endswithr=   )r)   r5   suffixreplacements       r-   _replace_suffixPorterStemmer._replace_suffix   sE    }}V$$P&PP$R<%%3v;,'+55r0   c                    U H  nUu  pEnUS:X  a5  U R                  U5      (       a  USS nUb  U" U5      (       a  Xu-   s  $ Us  $ UR                  U5      (       d  M[  U R                  XS5      nUb  U" U5      (       a  Xu-   s  $ Us  $    U$ )aa  Applies the first applicable suffix-removal rule to the word

Takes a word and a list of suffix-removal rules represented as
3-tuples, with the first element being the suffix to remove,
the second element being the string to replace it with, and the
final element being the condition for the rule to be applicable,
or None if the rule is unconditional.
*dNrM   r8   )rP   rY   r\   )r)   r5   rulesrulerZ   r[   	conditionr?   s           r-   _apply_rule_listPorterStemmer._apply_rule_list   s     D-1*F~$"="=d"C"CCRy$	$--  K}}V$$++D"=$	$--  K " r0   c                     U R                   U R                  :X  a8  UR                  S5      (       a"  [        U5      S:X  a  U R	                  USS5      $ U R                  U/ SQ5      $ )aq  Implements Step 1a from "An algorithm for suffix stripping"

From the paper:

    SSES -> SS                         caresses  ->  caress
    IES  -> I                          ponies    ->  poni
                                       ties      ->  ti
    SS   -> SS                         caress    ->  caress
    S    ->                            cats      ->  cat
ies   ie))ssesssN)rf   r!   N)rj   rj   N)sr8   N)r%   r   rY   r=   r\   rc   rO   s     r-   _step1aPorterStemmer._step1a  s_     99,,,}}U##D	Q++D%>>$$
 	
r0   c                 Z  ^ ^ T R                   T R                  :X  aK  UR                  S5      (       a5  [        U5      S:X  a  T R	                  USS5      $ T R	                  USS5      $ UR                  S5      (       a/  T R	                  USS5      nT R                  U5      S:  a  US-   $ U$ S	nS
 HG  nUR                  U5      (       d  M  T R	                  XS5      mT R                  T5      (       d  ME  Sn  O   U(       d  U$ T R                  TSSSSTS   U4S j4SSU 4S j4/5      $ )am  Implements Step 1b from "An algorithm for suffix stripping"

From the paper:

    (m>0) EED -> EE                    feed      ->  feed
                                       agreed    ->  agree
    (*v*) ED  ->                       plastered ->  plaster
                                       bled      ->  bled
    (*v*) ING ->                       motoring  ->  motor
                                       sing      ->  sing

If the second or third of the rules in Step 1b is successful,
the following is done:

    AT -> ATE                       conflat(ed)  ->  conflate
    BL -> BLE                       troubl(ed)   ->  trouble
    IZ -> IZE                       siz(ed)      ->  size
    (*d and not (*L or *S or *Z))
       -> single letter
                                    hopp(ing)    ->  hop
                                    tann(ed)     ->  tan
                                    fall(ing)    ->  fall
                                    hiss(ing)    ->  hiss
                                    fizz(ed)     ->  fizz
    (m=1 and *o) -> E               fail(ing)    ->  fail
                                    fil(ing)     ->  file

The rule to map to a single letter causes the removal of one of
the double letter pair. The -E is put back on -AT, -BL and -IZ,
so that the suffixes -ATE, -BLE and -IZE can be recognised
later. This E may be removed in step 4.
iedrg   rh   r!   eedr8   r   eeF)edingT)atateN)blbleN)izizeNr_   rL   c                    > TS   S;  $ )NrL   )lrk   z )r?   intermediate_stems    r-   <lambda>'PorterStemmer._step1b.<locals>.<lambda>x  s    !22!6o!Mr0   r    c                 \   > TR                  U 5      S:H  =(       a    TR                  U 5      $ Nr3   )rA   rV   r?   r)   s    r-   r   r   ~  s$    $--"5":"St~~d?S"Sr0   )r%   r   rY   r=   r\   rA   rH   rc   )r)   r5   r?   rule_2_or_3_succeededrZ   r~   s   `    @r-   _step1bPorterStemmer._step1b/  sA   F 99,,,}}U##t9>//eTBB//eSAA ==''eR8D}}T"Q&d{" %#F}}V$$$($8$8r$J!''(9::,0) $ %K$$### %b)M T
 	
r0   c                    ^  U 4S jnU 4S jnT R                  USST R                  T R                  :X  a  U4/5      $ U4/5      $ )zImplements Step 1c from "An algorithm for suffix stripping"

From the paper:

Step 1c

    (*v*) Y -> I                    happy        ->  happi
                                    sky          ->  sky
c                 j   > [        U 5      S:  =(       a    TR                  U [        U 5      S-
  5      $ )aP  
This has been modified from the original Porter algorithm so
that y->i is only done when y is preceded by a consonant,
but not if the stem is only a single consonant, i.e.

   (*c and not c) Y -> I

So 'happy' -> 'happi', but
   'enjoy' -> 'enjoy'  etc

This is a much better rule. Formerly 'enjoy'->'enjoi' and
'enjoyment'->'enjoy'. Step 1c is perhaps done too soon; but
with this modification that no longer really matters.

Also, the removal of the contains_vowel(z) condition means
that 'spy', 'fly', 'try' ... stem to 'spi', 'fli', 'tri' and
conflate with 'spied', 'tried', 'flies' ...
r3   rN   r   s    r-   nltk_condition-PorterStemmer._step1c.<locals>.nltk_condition  s-    & t9q=LT%7%7c$i!m%LLr0   c                 &   > TR                  U 5      $ )N)rH   r   s    r-   original_condition1PorterStemmer._step1c.<locals>.original_condition  s    ''--r0   r2   r!   )rc   r%   r   )r)   r5   r   r   s   `   r-   _step1cPorterStemmer._step1c  sj    	M*	. $$   99(<(<< '	

 	
 0

 	
r0   c                   ^ ^ T R                   T R                  :X  a_  TR                  S5      (       aI  T R                  T R	                  TSS5      5      (       a"  T R                  T R	                  TSS5      5      $ SST R                  4nSST R                  4nSS	T R                  4S
ST R                  4SST R                  4SST R                  4SST R                  4T R                   T R                  :X  a  UOUSST R                  4SST R                  4SST R                  4SST R                  4SST R                  4SS	T R                  4SS	T R                  4SST R                  4SST R                  4SST R                  4S ST R                  4S!ST R                  4S"ST R                  4S#ST R                  4/nT R                   T R                  :X  a7  UR                  S$ST R                  45        UR                  S%S&U U4S' j45        T R                   T R                  :X  a  UR                  S%S&T R                  45        T R                  TU5      $ )(a=  Implements Step 2 from "An algorithm for suffix stripping"

From the paper:

Step 2

    (m>0) ATIONAL ->  ATE       relational     ->  relate
    (m>0) TIONAL  ->  TION      conditional    ->  condition
                                rational       ->  rational
    (m>0) ENCI    ->  ENCE      valenci        ->  valence
    (m>0) ANCI    ->  ANCE      hesitanci      ->  hesitance
    (m>0) IZER    ->  IZE       digitizer      ->  digitize
    (m>0) ABLI    ->  ABLE      conformabli    ->  conformable
    (m>0) ALLI    ->  AL        radicalli      ->  radical
    (m>0) ENTLI   ->  ENT       differentli    ->  different
    (m>0) ELI     ->  E         vileli        - >  vile
    (m>0) OUSLI   ->  OUS       analogousli    ->  analogous
    (m>0) IZATION ->  IZE       vietnamization ->  vietnamize
    (m>0) ATION   ->  ATE       predication    ->  predicate
    (m>0) ATOR    ->  ATE       operator       ->  operate
    (m>0) ALISM   ->  AL        feudalism      ->  feudal
    (m>0) IVENESS ->  IVE       decisiveness   ->  decisive
    (m>0) FULNESS ->  FUL       hopefulness    ->  hopeful
    (m>0) OUSNESS ->  OUS       callousness    ->  callous
    (m>0) ALITI   ->  AL        formaliti      ->  formal
    (m>0) IVITI   ->  IVE       sensitiviti    ->  sensitive
    (m>0) BILITI  ->  BLE       sensibiliti    ->  sensible
allir8   alblirw   abliableationalru   tionaltionencienceancianceizerry   entlientelir    ousliousizationationatoralismivenessivefulnessfulousnessalitiivitibilitifullilogilogc                 ,   > TR                  TS S 5      $ )N)rE   r?   r)   r5   s    r-   r   &PorterStemmer._step2.<locals>.<lambda>  s    T-G-GSb	-Rr0   )
r%   r   rY   rE   r\   _step2r
   appendr	   rc   )r)   r5   bli_rule	abli_ruler`   s   ``   r-   r   PorterStemmer._step2  se   < 99,,, }}V$$)C)C$$T626* * {{4#7#7fd#KLL5$"<"<=VT%?%?@	 t99:vt99:VT778VT778UD667d&=&==I8T4556eT778C334eT778t99:eT778UD667dD667t99:t99:t99:dD667eT778ud889)
. 99,,,LL'5$*D*DEF
 LL RS 99...LL&%)C)CDE$$T511r0   c                     U R                  USSU R                  4SSU R                  4SSU R                  4SSU R                  4SSU R                  4S	SU R                  4S
SU R                  4/5      $ )a  Implements Step 3 from "An algorithm for suffix stripping"

From the paper:

Step 3

    (m>0) ICATE ->  IC              triplicate     ->  triplic
    (m>0) ATIVE ->                  formative      ->  form
    (m>0) ALIZE ->  AL              formalize      ->  formal
    (m>0) ICITI ->  IC              electriciti    ->  electric
    (m>0) ICAL  ->  IC              electrical     ->  electric
    (m>0) FUL   ->                  hopeful        ->  hope
    (m>0) NESS  ->                  goodness       ->  good
icateicativer8   alizer   icitiicalr   ness)rc   rE   rO   s     r-   _step3PorterStemmer._step3  s     $$$ : :;"d889$ : :;$ : :;t99:D667T778
 	
r0   c                    ^  U 4S jnT R                  USSU4SSU4SSU4SSU4SSU4SSU4S	SU4S
SU4SSU4SSU4SSU4SSU 4S j4SSU4SSU4SSU4SSU4SSU4SSU4SSU4/5      $ )a&  Implements Step 4 from "An algorithm for suffix stripping"

Step 4

    (m>1) AL    ->                  revival        ->  reviv
    (m>1) ANCE  ->                  allowance      ->  allow
    (m>1) ENCE  ->                  inference      ->  infer
    (m>1) ER    ->                  airliner       ->  airlin
    (m>1) IC    ->                  gyroscopic     ->  gyroscop
    (m>1) ABLE  ->                  adjustable     ->  adjust
    (m>1) IBLE  ->                  defensible     ->  defens
    (m>1) ANT   ->                  irritant       ->  irrit
    (m>1) EMENT ->                  replacement    ->  replac
    (m>1) MENT  ->                  adjustment     ->  adjust
    (m>1) ENT   ->                  dependent      ->  depend
    (m>1 and (*S or *T)) ION ->     adoption       ->  adopt
    (m>1) OU    ->                  homologou      ->  homolog
    (m>1) ISM   ->                  communism      ->  commun
    (m>1) ATE   ->                  activate       ->  activ
    (m>1) ITI   ->                  angulariti     ->  angular
    (m>1) OUS   ->                  homologous     ->  homolog
    (m>1) IVE   ->                  effective      ->  effect
    (m>1) IZE   ->                  bowdlerize     ->  bowdler

The suffixes are now removed. All that remains is a little
tidying up.
c                 ,   > TR                  U 5      S:  $ r   rD   r   s    r-   r   &PorterStemmer._step4.<locals>.<lambda>=  s    DMM$$7!$;r0   r   r8   r   r   err   r   ibleantementmentr   ionc                 J   > TR                  U 5      S:  =(       a    U S   S;   $ )Nr3   rL   )rk   trD   r   s    r-   r   r   Q  s$    t!4q!8!ST"X=S!Sr0   ouismru   itir   r   ry   rc   )r)   r5   measure_gt_1s   `  r-   _step4PorterStemmer._step4!  s    8 <$$r<(\*\*r<(r<(\*\*L)"l+\*L) S
 r<(L)L)L)L)L)L)1
 	
r0   c                     UR                  S5      (       aW  U R                  USS5      nU R                  U5      S:  a  U$ U R                  U5      S:X  a  U R                  U5      (       d  U$ U$ )a  Implements Step 5a from "An algorithm for suffix stripping"

From the paper:

Step 5a

    (m>1) E     ->                  probate        ->  probat
                                    rate           ->  rate
    (m=1 and not *o) E ->           cease          ->  ceas
r    r8   r3   )rY   r\   rA   rV   )r)   r5   r?   s      r-   _step5aPorterStemmer._step5a]  sf    8 ==''c26D}}T"Q&}}T"a't0D0Dr0   c                 <   ^ ^ T R                  TSSU U4S j4/5      $ )zImplements Step 5a from "An algorithm for suffix stripping"

From the paper:

Step 5b

    (m > 1 and *d and *L) -> single letter
                            controll       ->  control
                            roll           ->  roll
llr{   c                 2   > TR                  TS S 5      S:  $ )NrL   r3   rD   r   s    r-   r   'PorterStemmer._step5b.<locals>.<lambda>  s    DMM$s),Dq,Hr0   r   rO   s   ``r-   _step5bPorterStemmer._step5b  s(     $$D#HIJ
 	
r0   c                    U(       a  UR                  5       OUnU R                  U R                  :X  a  XR                  ;   a  U R                  U   $ U R                  U R                  :w  a  [        U5      S::  a  U$ U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ )zG
:param to_lowercase: if `to_lowercase=True` the word always lowercase
rK   )lowerr%   r   r&   r
   r=   rl   r   r   r   r   r   r   r   )r)   r5   to_lowercaser?   s       r-   r?   PorterStemmer.stem  s      ,tzz|99,,,1B99T?"99///CIN K||D!||D!||D!{{4 {{4 {{4 ||D!||D!r0   c                     g)Nz<PorterStemmer>r}   )r)   s    r-   __repr__PorterStemmer.__repr__  s     r0   )r%   r&   r(   N)T)__name__
__module____qualname____firstlineno____doc__r   r	   r
   r.   r4   rA   rE   rH   rP   rV   r\   rc   rl   r   r   r   r   r   r   r   r?   r   __static_attributes__r}   r0   r-   r   r      s    2j (O+-+ &;P*1'f'	

*68
6R
h0
dN2`
8:
x"H
4!r0   r   c                      SSK Jn   SSKJn  U R	                  5       n/ n/ nUR                  5       SS  HN  nUR                  U5       H6  u  pgUR                  U5        UR                  UR                  U5      5        M8     MP     SR                  U5      n[        R                  " SSUS-   5      R                  5       nSR                  U5      n	[        R                  " SSU	S-   5      R                  5       n	[        S	R                  S
5      R                  SS5      R                  SS5      5        [        U	5        [        SR                  S
5      R                  SS5      R                  SS5      5        [        U5        [        S5        g)zR
A demonstration of the porter stemmer on a sample from
the Penn Treebank corpus.
r   )r?   )treebankNrS    z
(.{,70})\sz\1\nz
-Original-F   *-z	-Results-zF**********************************************************************)nltkr?   nltk.corpusr   r   fileidstagged_wordsr   joinresubrstripprintcenterreplace)
r?   r   stemmerorigstemmeditemr5   tagresultsoriginals
             r-   demor    sB    $  "GDG  "2A&!..t4IDKKNN7<<-. 5 ' hhwGff]GWs];BBDG xx~HvvmWhn=DDFH 
,

b
!
)
)#s
3
;
;C
EF	(O	+

R
 
(
(c
2
:
:3
DE	'N	(Or0   )r   __docformat__r   nltk.stem.apir   r   r  r}   r0   r-   <module>r     s+   (  	 "O
!H O
!dr0   