
    /h/                     2    S r SSKrSSKJr   " S S\5      rg)z
A word stemmer based on the Lancaster (Paice/Husk) stemming algorithm.
Paice, Chris D. "Another Stemmer." ACM SIGIR Forum 24.3 (1990): 56-61.
    N)StemmerIc                   Z    \ rS rSrSrSrSS jrSS jrS rS r	S	 r
S
 rS rS rS rSrg)LancasterStemmer   a  
Lancaster Stemmer

    >>> from nltk.stem.lancaster import LancasterStemmer
    >>> st = LancasterStemmer()
    >>> st.stem('maximum')     # Remove "-um" when word is intact
    'maxim'
    >>> st.stem('presumably')  # Don't remove "-um" when word is not intact
    'presum'
    >>> st.stem('multiply')    # No action taken if word ends with "-ply"
    'multiply'
    >>> st.stem('provision')   # Replace "-sion" with "-j" to trigger "j" set of rules
    'provid'
    >>> st.stem('owed')        # Word starting with vowel must contain at least 2 letters
    'ow'
    >>> st.stem('ear')         # ditto
    'ear'
    >>> st.stem('saying')      # Words starting with consonant must contain at least 3
    'say'
    >>> st.stem('crying')      #     letters and one of those letters must be a vowel
    'cry'
    >>> st.stem('string')      # ditto
    'string'
    >>> st.stem('meant')       # ditto
    'meant'
    >>> st.stem('cement')      # ditto
    'cem'
    >>> st_pre = LancasterStemmer(strip_prefix_flag=True)
    >>> st_pre.stem('kilometer') # Test Prefix
    'met'
    >>> st_custom = LancasterStemmer(rule_tuple=("ssen4>", "s1t."))
    >>> st_custom.stem("ness") # Change s to t
    'nest'
)szai*2.za*1.zbb1.zcity3s.zci2>zcn1t>zdd1.zdei3y>zdeec2ss.zdee1.zde2>zdooh4>ze1>zfeil1v.zfi2>zgni3>zgai3y.zga2>zgg1.zht*2.z	hsiug5ct.zhsi3>zi*1.zi1y>zji1d.zjuf1s.zju1d.zjo1d.zjeh1r.zjrev1t.zjsim2t.zjn1d.zj1s.zlbaifi6.zlbai4y.zlba3>zlbi3.zlib2l>zlc1.zlufi4y.zluf3>zlu2.zlai3>zlau3>zla2>zll1.zmui3.zmu*2.zmsi3>zmm1.znois4j>znoix4ct.znoi3>znai3>zna2>znee0.zne2>znn1.zpihs4>zpp1.zre2>zrae0.zra2.zro2>zru2>zrr1.zrt1>zrei3y>zsei3y>zsis2.zsi2>zssen4>zss0.zsuo3>zsu*2.zs*1>zs0.z	tacilp4y.zta2>ztnem4>ztne3>ztna3>ztpir2b.ztpro2b.ztcud1.ztpmus2.ztpec2iv.ztulo2v.ztsis0.ztsi3>ztt1.zuqi3.zugo1.zvis3j>zvie0.zvi2>zylb1>zyli3y>zylp0.zyl2>zygo1.zyhp1.zymo1.zypo1.zyti3>zyte3>zytl2.zyrtsi5.zyra3>zyro3>zyfi3.zycn2t>zyca3>zzi2>zzy1s.Nc                 \    0 U l         X l        U(       a  Xl        gU R                  U l        g)z,Create an instance of the Lancaster stemmer.N)rule_dictionary_strip_prefixdefault_rule_tuple_rule_tuple)self
rule_tuplestrip_prefix_flags      K/var/www/auris/envauris/lib/python3.13/site-packages/nltk/stem/lancaster.py__init__LancasterStemmer.__init__   s'      ".)3:9P9P    c                 L   U(       a  UOU R                   n[        R                  " S5      n0 U l        U Hl  nUR	                  U5      (       d  [        SU S35      eUSS nX@R                  ;   a   U R                  U   R                  U5        M\  U/U R                  U'   Mn     g)a  Validate the set of rules used in this stemmer.

If this function is called as an individual method, without using stem
method, rule_tuple argument will be compiled into self.rule_dictionary.
If this function is called within stem, self._rule_tuple will be used.

z^[a-z]+\*?\d[a-z]*[>\.]?$z	The rule z is invalidr      N)r   recompiler   match
ValueErrorappend)r   r   
valid_rulerulefirst_letters        r   
parseRulesLancasterStemmer.parseRules   s     $.Z43C3C
ZZ <=
!D##D)) 9TF+!>??!9L333$$\299$?6:V$$\2 r   c                     UR                  5       nU R                  (       a  U R                  U5      OUnUnU R                  (       d  U R	                  5         U R                  X5      $ )z(Stem a word using the Lancaster stemmer.)lowerr	   _LancasterStemmer__stripPrefixr   r   _LancasterStemmer__doStemming)r   wordintact_words      r   stemLancasterStemmer.stem   sV     zz|+/+=+=t!!$'4  ##OO  33r   c                    [         R                  " S5      nSnU(       Ga)  U R                  U5      nUS:  d  X   U R                  ;  a  SnOSnU R                  X       H  nUR	                  U5      nU(       d  M  UR                  5       u  n	n
nnn[        U5      nUR                  U	SSS2   5      (       d  M\  U
(       a=  X:X  a6  U R                  X5      (       a  U R                  XU5      nSnUS:X  a  Sn  O<M  M  U R                  X5      (       d  M  U R                  XU5      nSnUS:X  a  Sn  O   US:X  a  SnU(       a  GM)  U$ )z Perform the actual word stemmingz#^([a-z]+)(\*?)(\d)([a-z]*)([>\.]?)$Tr   FN.)
r   r    _LancasterStemmer__getLastLetterr   r   groupsintendswith_LancasterStemmer__isAcceptable_LancasterStemmer__applyRule)r   r#   r$   r   proceedlast_letter_positionrule_was_appliedr   
rule_matchending_stringintact_flagremove_totalappend_string	cont_flags                 r   __doStemmingLancasterStemmer.__doStemming   s    ZZ FG
#'#7#7#=  %q(-T5I5II $)  !001KLD!+!1!1$!7J!z '--/)'()%
 (+<'8  ==tt)<==*#'#64;N;N$(<" <" ,0+;+;(,M,&D 8<$4'0C'727$)<"#6 "&!4!4T!H!H'+'7'7$((" 48 0#,#3.3G %G MJ $u,#Gk gl r   c                 x    Sn[        [        U5      5       H  nX   R                  5       (       a  UnM    U$    U$ )zHGet the zero-based index of the last alphabetic character in this stringr(   )rangelenisalpha)r   r#   last_letterpositions       r   __getLastLetter LancasterStemmer.__getLastLetter  sA    c$i(H~%%''& )
 r   c                     SnUS   S;   a  [        U5      U-
  S:  a  SnU$ [        U5      U-
  S:  a  US   S;   a  SnU$ US   S;   a  SnU$ )z1Determine if the word is acceptable for stemming.Fr   aeiouy   T   r   r=   )r   r#   r6   word_is_acceptables       r   __isAcceptableLancasterStemmer.__isAcceptable$  s|    " 7h4y<'1,%)" "! Y%*Aw("%)" "! aH$%)"!!r   c                 B    [        U5      U-
  nUSU nU(       a  X-  nU$ )z#Apply the stemming rule to the wordr   rG   )r   r#   r6   r7   new_word_lengths        r   __applyRuleLancasterStemmer.__applyRule5  s0     d)l2Ao& !Dr   c                 d    S H)  nUR                  U5      (       d  M  U[        U5      S s  $    U$ )zIRemove prefix from a word.

This function originally taken from Whoosh.

)	kilomicromilliintraultramegananopicopseudoN)
startswithr=   )r   r#   prefixs      r   __stripPrefixLancasterStemmer.__stripPrefix@  s7    

F v&&CKM**

 r   c                     g)Nz<LancasterStemmer> )r   s    r   __repr__LancasterStemmer.__repr__U  s    #r   )r   r	   r   )NF)N)__name__
__module____qualname____firstlineno____doc__r
   r   r   r%   r"   r*   r.   r/   r!   r_   __static_attributes__r^   r   r   r   r      sA    !HtlQ<.4=~""	*$r   r   )re   r   nltk.stem.apir   r   r^   r   r   <module>rh      s     
 "E$x E$r   