o
    rZhKC                     @   sZ  d Z ddlZddlmZ ddlmZ edZdSddZdTd
dZ	dd Z
dd ZdSddZdd Zdd Ze  eejZi ddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7i d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYi dZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{i d|d}d~dddddddddddddddddddddddddddddddi ddddddddddddddddddddddddddddddddddi ddddÓddœddǓddɓdd˓dd͓ddϓddѓddӓddՓddדddٓddۓddݓddߓddi ddddddddddddddddddddddddddddddd dddi dddddd	d
dddddddddddddddddddddd d!d"d#d$d%i d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGi dHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxZi dydzd{d|d}d~ddddddddddddddddddddddddddddi ddddddddddddddddddddddddddddddddddi dddddddÐdēdŐdƓdǐdȓdɐdʓdːd̓d͐dΓdϐdГdѐdғdӐdԓdՐd֓dאdؓdِdړdېdܓdݐdޓi dߐddddddddddddddddddddddddddddddddd i ddddddddd	d
dddddddddddddddddddddd d!d"i d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDi dEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfi dgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~ddddddddddi ddddddddddddddddddddddddddddddddddi dddddddddddddddddddddddddÐdēdŐdƓdǐdȓdɐdʓdːd̓i d͐dΓdϐdГdѐdғdӐdԓdՐd֓dאdؓdِdړdېdܓdݐdޓdߐdddddddddddddddi dddddddddddddddddd ddddddddd	d
ddddddi dddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2i d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTi dUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvi dwdxdydzd{d|d}d~ddddddddddddddddddddddddddi ddddddddddddddddddddddddddddddddddi dddddddddÐdēdŐdƓdǐdȓdɐdʓdːd̓d͐dΓdϐdГdѐdғdӐdԓdՐd֓dאdؓdِdړdېdܓi dݐdޓdߐdddddddddddddddddddddddddddddddi dd ddddddddd	d
dddddddddddddddddddddd i d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRZeeZeeZdS (U  a  
Translate between language names and language codes.

The iso639-3 language codes were downloaded from the registration authority at
https://iso639-3.sil.org/

The iso639-3 codeset is evolving, so retired language codes are kept in the
"iso639retired" dictionary, which is used as fallback by the wrapper functions
"langname" and "langcode", in order to support the lookup of retired codes.

The "langcode" function returns the current iso639-3 code if there is one,
and falls back to the retired code otherwise. As specified by BCP-47,
it returns the shortest (2-letter) code by default, but 3-letter codes
are also available:

    >>> import nltk.langnames as lgn
    >>> lgn.langname('fri')          #'fri' is a retired code
    'Western Frisian'

    The current code is different from the retired one:
    >>> lgn.langcode('Western Frisian')
    'fy'

    >>> lgn.langcode('Western Frisian', typ = 3)
    'fry'

    N)warn)bcp47z[a-z][a-z][a-z]?fullc                 C   s   |  d}|d  }t|rN|tv rt| S |tv r8t| }td|d|dd d|g|dd  } t	| }|d	krC|S |rL| d
d S dS td|dd dS )z
    Convert a composite BCP-47 tag to a language name

    >>> from nltk.langnames import langname
    >>> langname('ca-Latn-ES-valencia')
    'Catalan: Latin: Spain: Valencian'

    >>> langname('ca-Latn-ES-valencia', typ="short")
    'Catalan'
    -r   zShortening z to    
stacklevel   Nr   :zCould not find code in )
splitlowercodepattern	fullmatchiso639retirediso639shortr   joinr   name)tagtyptagscodecode2r    r   =/var/www/auris/lib/python3.10/site-packages/nltk/langnames.pylangname.   s    


r   r   c                 C   sV   | t jv rt j|  }|dkr|tv rt| }|S | tv r t|  S td| dd dS )ai  
    Convert language name to iso639-3 language code. Returns the short 2-letter
    code by default, if one is available, and the 3-letter code otherwise:

    >>> from nltk.langnames import langcode
    >>> langcode('Modern Greek (1453-)')
    'el'

    Specify 'typ=3' to get the 3-letter code:

    >>> langcode('Modern Greek (1453-)', typ=3)
    'ell'
       zCould not find language in r   r   N)r   langcode
iso639longiso639code_retiredr   )r   r   r   r   r   r   r   K   s   

r   c                 C   s
   t j|  S )z^
    Convert BCP-47 tag to Wikidata Q-code

    >>> tag2q('nds-u-sd-demv')
    'Q4289225'
    )r   wiki_q)r   r   r   r   tag2qi   s   
r    c                 C   s   t |  S )z^
    Convert Wikidata Q-code to BCP-47 tag

    >>> q2tag('Q4289225')
    'nds-u-sd-demv'
    )
wiki_bcp47)qcoder   r   r   q2tags   s   r#   c                 C   s   t t| |S )z
    Convert Wikidata Q-code to BCP-47 (full or short) language name

    >>> q2name('Q4289225')
    'Low German: Mecklenburg-Vorpommern'

    >>> q2name('Q4289225', "short")
    'Low German'
    )r   r#   )r"   r   r   r   r   q2name}   s   
r$   c                 C   s   t t| S )zd
    Convert simple language name to Wikidata Q-code

    >>> lang2q('Low German')
    'Q25433'
    )r    r   )r   r   r   r   lang2q   s   r%   c                 C   s:   t |  t t|  krdd |  D S td dS )z3Return inverse mapping, but only if it is bijectivec                 S   s   i | ]\}}||qS r   r   ).0keyvalr   r   r   
<dictcomp>   s    z inverse_dict.<locals>.<dictcomp>z1This dictionary has no bijective inverse mapping.N)lenkeyssetvaluesitemsr   )Zdicr   r   r   inverse_dict   s   r/   ZaarZaaZabkabZafrafZakaZakZamhamZaraarargZanasmasZavaavZaveZaeZaymZayZazeazZbakbaZbamZbmZbelbeZbenZbnZbisZbiZbodZboZbosbsZbrebrZbulbgcatcaZcescsZchachZcheZceZchucuZchvZcvZcorkwcoscoZcrecrZcymcyZdandaZdeudedivZdvZdzoZdzZellelZengenZepoeoZestetZeuseueweeeZfaofoZfasfaZfijfjZfinfiZfrafrZfryfyZfulffZglagdZglegaZglgglZglvgvZgrnZgnZgujguZhatZhtZhauZhaZhbsshZhebheZherhzZhinhiZhmoZhoZhrvhrZhunhuZhyehyZiboZigZidoioZiiiiiZikuiuZileieZinaiaindidZipkikZislisZitaitZjavZjvZjpnjakalklZkanknZkasksZkatkaZkaukrZkazkkZkhmkmZkikZkiZkinrwZkirkyZkomkvZkonZkgZkorkoZkuaZkjZkurZkuZlaoloZlatZlaZlavlvZlimZliZlinlnZlitltZltzZlbZlubZluZlugZlgZmahmhZmalmlmarmrZmkdmkZmlgZmgZmltmtmonZmnZmrimiZmsamsZmyaZmyZnaunaZnavnvZnblnrZndendZndongZnepneZnldnlZnnonnZnobnbZnornoZnyanyZociocZojiZojZoriorZormomZossospanpaZplipiZpolplZporptZpusZpsZqueZquZrohZrmZronrorunZrnZrusruZsagZsgsansasinsiZslkskZslvslZsmeseZsmosmZsnaZsnZsndsdZsomsoZsotstZspaesZsqisqZsrdscZsrpsrZsswsssunZsuZswaswZswesvZtahtyZtamtaZtattttelteZtgktgZtgltlZthathZtirtiZtontoZtsntnZtsotsZtukZtkZturtrZtwiZtwZuigZugukuruzveviZvowaZwoxhyiZyoZzazhzu)ZukrZurdZuzbZvenZvieZvolZwlnZwolZxhoZyidZyorZzhaZzhoZzulfrizWestern FrisianZauvZ	AuvergnatZgscZGasconZlmsZLimousinZlncZLanguedocienZprvu
   ProvençalZamdu   Amapá CreoleZbghZBoganZbnhu   BanawáZbvszBelgian Sign LanguageZccyzSouthern ZhuangZcitZChittagonianZflmz
Falam ChinZjapu   JaruáraZkobZKohoroxitariZmobZMoinbaZmzfZAikuZnhjzTlalitzlipa NahuatlZnhszSoutheastern Puebla NahuatlZoccZ
OccidentalZtmxZTomyangZtotzPatla-Chicontla TotonacZxmiu   MiarrãZyibZYinglishZztczLachirioag ZapotecZatfZAtuenceZbqezNavarro-Labourdin BasqueZbszzSouletin BasqueZaexZAmeraxZaheZAheZaizZAariZaknZAmikoanaZarfZArafundiZazrZAdzeraZbcxZPamonaZbiiZBisuZbkeZBengkuluZbluz
Hmong NjuaZboczBakung KenyahZbsdzSarawak BisayaZbwvzBahau River KenyahZbxtZBuxinhuaZbyuZBuyangZccxzNorthern ZhuangZcruu	   CarútanaZdatzDarang DengZdykz
Land DayakZeniZEnimZfizZIzeregenz
Geman DengZgghzGarreh-AjuranZituZItutangZkdszLahu ShiZknhzKayan River KenyahZkrgzNorth KorowaiZkrqZKruiZkxgZKatinganZlmtZLematangZlntZLintangZlodZBerawanZmbgu   Northern NambikuáraZmdozSouthwest GbayaZmhvZ	ArakaneseZmivZMimiZmqdZMadangZnkyzKhiamniungan NagaZnxjZNyaduZognZOganZorkZOrokaivaZpajzIpeka-TapuiaZpeczSouthern PesisirpenZPenesakZplmZ	PalembangZpojzLower PokomoZpunZPubianZraeZRanauZrjbZ	RajbanshiZrwsZRawasZsddZSemendoZsdizSindang KelingiZsklZSelakoZslbzKahumamahon SaluanZsrjZSerawaisufZTarpiaZsuhZSubaZsuuZSungkaiZszkZSizakiZtlezSouthern MarakwetZtnjZTanjongZttxzTutong 1ZubmzUpper Baram KenyahZvkyz
Kayu AgungZvmoz	Muko-MukoZwreZWareZxahZKahayanZxkmzMahakam KenyahZxufZKunfalZyiozDayao YiZymjzMuji YiZyplzPula YiZypwzPuwa YiZywmz	Wumeng YiZyymzYuanjiang-Mojiang YiZmlyzMalay (individual language)ZmuwZMundariZxstzSilt'eZopezOld PersianZsccZSerbianZscrZCroatianZxskZSakanZmolZ	MoldavianZaayZAariyaaccu   Cubulco AchíZcbmz Yepocapa Southwestern CakchiquelZchsZChumashZckczNorthern CakchiquelZckdzSouth Central CakchiquelZckezEastern CakchiquelZckfzSouthern CakchiquelZckiu!   Santa María De Jesús CakchiquelZckjz Santo Domingo Xenacoj CakchiquelZckkz"Acatenango Southwestern CakchiquelZckwzWestern CakchiquelZcnmu   Ixtatán ChujZctiz	Tila CholZcunu   Cunén QuichéemlzEmiliano-RomagnoloZeurZ	EuropantoZgmozGamo-Gofa-DawroZhsfzSoutheastern HuastecZhvau   San Luís Potosí HuastecZixiz
Nebaj IxilZixjzChajul IxilZjaizWestern JacaltecommszSouthern MamZmpfzTajumulco MamZmtzZTacanecZmvczCentral MamZmvju   Todos Santos Cuchumatán MamZpoazEastern PokomamZpobu   Western PokomchíZpouzSouthern PokomamZppvu   PapavôZquju   Joyabaj QuichéZqutu   West Central QuichéZquuu   Eastern QuichéZqxiu   San Andrés QuichéZsicZ	MalinguatZstcz
Santa CruzZtlzzToala'Ztzbu   Bachajón TzeltalZtzczChamula TzotzilZtzeu   Chenalhó TzotzilZtzsu   San Andrés Larrainzar TzotzilZtztzWestern TzutujilZtzuu   Huixtán TzotzilZtzzu   Zinacantán TzotzilZvlrZVatrataZyuszChan Santa Cruz MayaZnfgZNyengZnfkZShakaraZagpZParananZbhkzAlbay BicolanoZbkbZFinalligZbtbzBeti (Cameroon)ZcjrZ	ChorotegaZcmkZChimakumZdrhZDarkhatZdrwZDarwaziZgavZ	GabutamonZmofzMohegan-Montauk-NarragansettZmstzCataelano MandayaZmytzSangab MandayaZrmru   CalóZsglzSanglechi-IshkashimiZsulZ
SurigaononsumzSumo-MayangnaZtnfZ	TangshewiZwgwZWagawagaZayxzAyi (China)ZbjqzSouthern Betsimisaraka MalagasyZdhazDhanwar (India)ZdklzKolum So DogonZmjaZMaheiZnbfZNaxiZnooZNootkaZtieZTingalZtkkZTakpaZbazZTunenZbjdZ
BandjigaliZccqZ	ChaungthaZckazKhumi Awa ChinZdapzNisi (India)ZdwlzWalo Kumbe DogonZelpZ	ElpaputihZgbcZGarawaZgioZGelaoZhrrZHoruruZibiZIbilojarzJarawa (Nigeria)ZkdvZKadoZkghzUpper Tanudan KalingaZkppz
Paku KarenZkzhzKenuzi-DongolaZlcqZLuhuZmgxZOmatiZnlnzDurango NahuatlZpbzZPaluZpgyZPongyongZscaZSansuZtlwzSouth WemaleZunpZWororaZwiwZWiranguZybdZYangbyeyenZYendangZymaZYampheZdafZDanZdjlZDjiwarliZggrzAghu TharnggaluZilwZTalurZizizIzi-Ezaa-Ikwo-MgboZmegZMeaZmldZMalakhelZmntZMaykulanZmwdZMudburaZmyqzForest ManinkaZnbxZNguraZnlrZNgarlaZpcrZPanangZpprZPiruZtggZTanggaZwitZWintuZxiaZXiandaoZyiyz
Yir YorontZyosZYosZemoZEmokZggmz	Gugu MinilegZLenguaZlmmZLamamZmhhzMaskoy PidginZpuzz
Purum NagaZsapu	   SanapanáZyuuZYughZaamZAramanikZadpZAdapZaueu   ǂKxʼauǁʼeinZbmyz$Bemba (Democratic Republic of Congo)Zbxxz$Borna (Democratic Republic of Congo)ZbyyZBuyaZdzdZDazaZgfxu   Mangetti Dune ǃXungZgtizGbati-riZimeZ	ImeraguenZkbfZKakauhuaZkojz
Sara DunjoZkwqZKwakZkxeZKakihumZliiZLingkhimZmwjZMaligoZnnxZNgongZounu   ǃOǃungZpmuzMirpur PanjabiZsgoZSongaZthxZTheZtsfzSouthwestern TamangZuokZUokhaZxsjZSubiZydszYiddish Sign LanguageZymtzMator-Taygi-KaragasZynhZYanghoZbgmzBaga MboteniZbtlZBhatolaZcbeZ	ChipiajesZcbhZCaguaZcoyZCoyaimaZcquzChilean QuechuaZcumZCumeralZdujZDhuwalZggnzEastern GurungZggozSouthern GondiZguvZGeyZiapZIapamaZillZIranunZkgcZKassengZkoxZCoximaZktrzKota Marudu TinagasZkvsZKunggaraZkzjzCoastal KadazanZkztzTambunan DusunZnadZNijadalintsZ
NatagaimasZomeZOmejesZpmcZPalumataZpodZPonaresZppaZPaoZpryzPray 3ZrnaZRunaZsvrZSavaraZtduzTempasuk DusunZthczTai Hang TongtidZTidongtmpu	   Tai MèneZtnezTinoc KallahanZtoeZTomedesZxbazKamba (Brazil)Zxbxu   KabixíZxipu	   XipináwaZxkhZKarahawyanaZyriu   YaríZjegZJengZkgdZKataangZkrmZKrimZprbzLua'ZpukzPu KoZrieZRienZrsizRennellese Sign LanguageZskkZSokZsnhZShinaboZlsgzLyons Sign LanguageZmwxZMediakZmwyZMosiroZncpZNdaktupZaiszNataoran AmisZasdZAsasZditZDirariZdudz	Hun-SaareZlbaZLuiZlloZKhlorZmydZMarambaZmyizMina (India)ZnnsZNingyeZaohZArmaZayyzTayabas AytaZbbzzBabalia Creole ArabicZbpbZ	BarbacoasZccaZCaucaZcdgZChamariZdguZDegaruZdrrZDororoZekczEastern KarnicZgliZGuliguliZkjfZKhalajZkxlzNepali KuruxZkxuzKui (India)ZlmzZLumbeeZnxuZNarauZplpZPalpaZsdmZ	SemandangZtbbZTapebaZxrqZKarrangaZxtzZ	TasmanianZzirZZiriyaZthwZThudamZbicZBikaruZbijzVaghat-Ya-Bijim-LegeriZblgZBalauZgjiZGejiZMuyaZNgoniZ	PapitalaizIja-ZubaZWarapuzJudeo-Tunisian ArabicZ
ChungmbokozLaka (Nigeria)zLango (South Sudan)ZPiniZSamaZSebuyauzKulon-PazehZWardujiZWyandot)ZmvmZngopatZvkiZwraZajtZcugZlaklnoZpiiZsmdZsnbZuunZwrdZwya)r   )r   )__doc__rewarningsr   Znltk.corpusr   compiler   r   r   r    r#   r$   r%   r/   Zload_wiki_qr   r!   r   r   r   r   r   r   r   r   <module>   s  	



	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~                 	  
                                               !  "  #  $  %  &  '  (  )  *  +  , 
 =	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~                 	  
                                               !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /  0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?  @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _  `  a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~                                     	    
                                                                                             !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /    0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?    @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O    P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _    `    a    b    c    d    e    f    g    h  
  z