o
    rZhR                     @   sj   d Z ddlZddlZddlZddlmZ ddlZddlm	Z	 G dd dZ
G dd dZG d	d
 d
ZdS )a  
If you use the VADER sentiment analysis tools, please cite:

Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
Sentiment Analysis of Social Media Text. Eighth International Conference on
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
    N)product)pairwisec                   @   s  e Zd ZdZdZdZdZdZh dZi deded	ed
edededededededededededededei dedededededededed ed!ed"ed#ed$ed%ed&ed'ed(ei d)ed*ed+ed,ed-ed.ed/ed0ed1ed2ed3ed4ed5ed6ed7ed8ed9eeeeeeeeeeeeeeeed:Z	d;d;d<d=d>d?d=d@Z
edAeej dBZg dCZdDdE ZdOdGdHZdPdJdKZdLdM ZdNS )QVaderConstantsz8
    A class to keep the Vader lists and constants.
    gn?gnҿg~jt?gGz>;   ZnopeZnothingzdaren'tZaintzain'tzmustn'tZarentZmustntzwasn'tneverZdespiteZhasntZhadntzdon'tZshantZneedntZneitherZwithoutzaren'tZdarentZshouldntnotZwasntnonezcouldn'tZhaventZdoesntZcantz	shouldn'tzisn'tzuh-uhZwontzcan'tZcannotzshan'tZdontzhadn'tzneedn'tzwouldn'tzweren'tZnorZwouldntzmightn'tZseldomZdidntZrarelyZuhuhZwerentzwon'tzdidn'tZnowherezdoesn'tzhasn'tZmightntzoughtn'tZoughtntZisntzhaven'tZcouldntZ
absolutelyZ	amazinglyZawfullyZ
completelyZconsiderablyZ	decidedlyZdeeplyZeffingZ
enormouslyZentirelyZ
especiallyZexceptionallyZ	extremelyZ
fabulouslyZflippingZflippinZfrickingZfrickinZfriggingZfrigginZfullyZfuckingZgreatlyZhellaZhighlyZhugelyZ
incrediblyZ	intenselyZmajorlymoreZmostZparticularlyZpurelyZquiteZreallyZ
remarkablysoZsubstantiallyZ
thoroughlyZtotallyZtremendouslyZuberZunbelievablyZ	unusuallyZutterlyveryZalmostZbarelyZhardlyzjust enoughzkind of)ZkindaZkindofzkind-oflesslittleZ
marginallyZoccasionallyZpartlyZscarcelyZslightlyZsomewhatzsort ofZsortaZsortofzsort-of         ?   g      )zthe shitzthe bombzbad assz
yeah rightzcut the mustardzkiss of deathzhand to mouth[]).!?,;:-'"z!!z!!!z??z???z?!?z!?!z?!?!z!?!?c                 C   s   d S N )selfr   r   C/var/www/auris/lib/python3.10/site-packages/nltk/sentiment/vader.py__init__   s   zVaderConstants.__init__Tc                    sn   | j  t fdd|D rdS |rtdd |D rdS t|D ]\}}| dkr4| dkr4 dS q!dS )z<
        Determine if input contains negation words
        c                 3   s    | ]	}|   v V  qd S r   lower.0wordZ	neg_wordsr   r   	<genexpr>       z)VaderConstants.negated.<locals>.<genexpr>Tc                 s   s    | ]	}d |  v V  qdS )zn'tNr!   r#   r   r   r   r'      r(   leastatF)NEGATEanyr   r"   )r   Zinput_wordsZ
include_ntfirstsecondr   r&   r   negated   s   zVaderConstants.negated   c                 C   s   |t || |  }|S )z|
        Normalize the score to be between -1 and 1 using an alpha that
        approximates the max expected value
        )mathsqrt)r   ZscorealphaZ
norm_scorer   r   r   	normalize   s   zVaderConstants.normalizec                 C   s`   d}|  }|| jv r.| j| }|dk r|d9 }| r.|r.|dkr)|| j7 }|S || j8 }|S )zh
        Check if the preceding words increase, decrease, or negate/nullify the
        valence
                r   )r"   BOOSTER_DICTisupperC_INCR)r   r%   valenceis_cap_diffZscalarZ
word_lowerr   r   r   scalar_inc_dec   s   



zVaderConstants.scalar_inc_decN)T)r0   )__name__
__module____qualname____doc__ZB_INCRB_DECRr9   N_SCALARr+   r7   SPECIAL_CASE_IDIOMSrecompileescapestringpunctuationREGEX_REMOVE_PUNCTUATION	PUNC_LISTr    r/   r4   r<   r   r   r   r   r   !   s   A	
 !"#$%&'()*+,-./01234G

r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )	SentiTextzL
    Identify sentiment-relevant string-level properties of input text.
    c                 C   sF   t |tst|d}|| _|| _|| _|  | _| | j| _	d S )Nzutf-8)

isinstancestrencodetextrJ   rI   _words_and_emoticonswords_and_emoticonsallcap_differentialr;   )r   rO   Z	punc_listZregex_remove_punctuationr   r   r   r      s   

zSentiText.__init__c                 C   sd   | j d| j}| }dd |D }dd t| j|D }dd t|| jD }|}|| |S )zt
        Returns mapping of form:
        {
            'cat,': 'cat',
            ',cat': 'cat',
        }
         c                 S   s   h | ]
}t |d kr|qS    len)r$   wr   r   r   	<setcomp>#      z-SentiText._words_plus_punc.<locals>.<setcomp>c                 S      i | ]
}d  ||d qS )rS   rU   joinr$   pr   r   r   
<dictcomp>%  rZ   z.SentiText._words_plus_punc.<locals>.<dictcomp>c                 S   r[   )rS   r   r\   r^   r   r   r   r`   &  rZ   )rI   subrO   splitr   rJ   update)r   Zno_punc_textZ
words_onlyZpunc_beforeZ
punc_afterwords_punc_dictr   r   r   _words_plus_punc  s   
zSentiText._words_plus_puncc                 C   sJ   | j  }|  }dd |D }t|D ]\}}||v r"|| ||< q|S )z
        Removes leading and trailing puncutation
        Leaves contractions and most emoticons
            Does not preserve punc-plus-letter emoticons (e.g. :D)
        c                 S   s   g | ]
}t |d kr|qS rT   rV   )r$   wer   r   r   
<listcomp>3  rZ   z2SentiText._words_and_emoticons.<locals>.<listcomp>)rO   rb   re   	enumerate)r   Zwesrd   irf   r   r   r   rP   +  s   
zSentiText._words_and_emoticonsc                 C   sT   d}d}|D ]
}|  r|d7 }qt|| }d|  k r#t|k r(n |S d}|S )z
        Check whether just some words in the input are ALL CAPS

        :param list words: The words to inspect
        :returns: `True` if some but not all items in `words` are ALL CAPS
        Fr   rU   T)r8   rW   )r   wordsZis_differentZallcap_wordsr%   Zcap_differentialr   r   r   rR   9  s   zSentiText.allcap_differentialN)r=   r>   r?   r@   r    re   rP   rR   r   r   r   r   rK     s    rK   c                   @   s|   e Zd ZdZ	dddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )SentimentIntensityAnalyzerz8
    Give a sentiment intensity score to sentences.
    ;sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txtc                 C   s$   t j|| _|  | _t | _d S r   )nltkdataloadlexicon_filemake_lex_dictlexiconr   	constants)r   rp   r   r   r   r    P  s   
z#SentimentIntensityAnalyzer.__init__c                 C   s@   i }| j dD ]}| ddd \}}t|||< q|S )z6
        Convert lexicon file to a dictionary
        
	r   r   )rp   rb   stripfloat)r   Zlex_dictliner%   measurer   r   r   rq   X  s
   z(SentimentIntensityAnalyzer.make_lex_dictc                 C   s   t || jj| jj}g }|j}|D ]8}d}||}|t|d k r2| dkr2||d   dks:| | jjv r@|	| q| 
|||||}q| ||}| ||S )a  
        Return a float for sentiment strength based on the input text.
        Positive values are positive valence, negative value are negative
        valence.

        :note: Hashtags are not taken into consideration (e.g. #BAD is neutral). If you
            are interested in processing the text in the hashtags too, then we recommend
            preprocessing your data to remove the #, after which the hashtag text may be
            matched as if it was a normal word in the sentence.
        r   rU   kindZof)rK   rs   rJ   rI   rQ   indexrW   r"   r7   appendsentiment_valence
_but_checkscore_valence)r   rO   	sentitext
sentimentsrQ   itemr:   ri   r   r   r   polarity_scoresb  s"   

z*SentimentIntensityAnalyzer.polarity_scoresc                 C   s  |j }|j}| }|| jv r| j| }| r+|r+|dkr%|| jj7 }n|| jj8 }tddD ]P}	||	kr|||	d    | jvr| j|||	d   ||}
|	dkr]|
dkr]|
d }
|	dkri|
dkri|
d }
||
 }| 	|||	|}|	dkr| 
|||}q0| |||}|| |S )Nr   r   rU   gffffff?r   g?)r;   rQ   r"   rr   r8   rs   r9   ranger<   _never_check_idioms_check_least_checkr|   )r   r:   r   r   ri   r   r;   rQ   Zitem_lowercasestart_isr   r   r   r}     s>   

	
z,SentimentIntensityAnalyzer.sentiment_valencec                 C   s   |dkr5||d    | jvr5||d    dkr5||d    dkr3||d    dkr3|| jj }|S |dkrT||d    | jvrT||d    dkrT|| jj }|S )NrU   r)   r   r*   r
   r   )r"   rr   rs   rB   )r   r:   rQ   ri   r   r   r   r     s   z'SentimentIntensityAnalyzer._least_checkc                 C   sr   dd |D }dht |@ }|r7|tt|}t|D ]\}}||k r,|d ||< q||kr6|d ||< q|S )Nc                 S   s   g | ]}|  qS r   r!   )r$   Zw_er   r   r   rg     s    z9SentimentIntensityAnalyzer._but_check.<locals>.<listcomp>butg      ?r   )setr{   nextiterrh   )r   rQ   r   r   ZbiZsidxZ	sentimentr   r   r   r~     s   z%SentimentIntensityAnalyzer._but_checkc                 C   s  ||d   d||  }d ||d  ||d  || }||d   d||d   }d ||d  ||d  ||d  }d ||d  ||d  }|||||g}	|	D ]}
|
| jjv rg| jj|
 } nqWt|d |kr||  d||d   }|| jjv r| jj| }t|d |d krd || ||d  ||d  }|| jjv r| jj| }|| jjv s|| jjv r|| jj }|S )NrU    z{} {} {}r   r   z{} {})formatrs   rC   rW   r7   rA   )r   r:   rQ   ri   ZonezeroZ
twoonezeroZtwooneZthreetwooneZthreetwo	sequencesseqZzerooneZ
zeroonetwor   r   r   r     sJ   






z(SentimentIntensityAnalyzer._idioms_checkc                 C   s   |dkr| j ||d  gr|| j j }|dkrI||d  dkr6||d  dks1||d  dkr6|d }n| j |||d   grI|| j j }|dkr||d  dkre||d  dksu||d  dksu||d  dksu||d  dkr{|d	 }|S | j |||d   gr|| j j }|S )
Nr   rU   r   r   r	   thisr   r   g      ?)rs   r/   rB   )r   r:   rQ   r   ri   r   r   r   r     s*   
z'SentimentIntensityAnalyzer._never_checkc                 C   s    |  |}| |}|| }|S r   )_amplify_ep_amplify_qm)r   sum_srO   ep_amplifierqm_amplifierpunct_emph_amplifierr   r   r   _punctuation_emphasis$  s   

z0SentimentIntensityAnalyzer._punctuation_emphasisc                 C   s"   | d}|dkrd}|d }|S )Nr      g㥛 ?count)r   rO   Zep_countr   r   r   r   r   +  s
   
z&SentimentIntensityAnalyzer._amplify_epc                 C   s2   | d}d}|dkr|dkr|d }|S d}|S )Nr   r   rU   r   g
ףp=
?gQ?r   )r   rO   Zqm_countr   r   r   r   r   5  s   
z&SentimentIntensityAnalyzer._amplify_qmc                 C   s`   d}d}d}|D ]"}|dkr|t |d 7 }|dk r"|t |d 7 }|dkr*|d7 }q|||fS )Nr5   r   rU   )rw   )r   r   pos_sumneg_sum	neu_countZsentiment_scorer   r   r   _sift_sentiment_scoresB  s    


z1SentimentIntensityAnalyzer._sift_sentiment_scoresc                 C   s   |rct t|}| ||}|dkr||7 }n|dk r||8 }| j|}| |\}}}|t|kr9||7 }n|t|k rD||8 }|t| | }	t||	 }
t||	 }t||	 }nd}d}
d}d}t|dt|dt|
dt|dd}|S )Nr   r5   r   r   )negneuposcompound)	rw   sumr   rs   r4   r   r1   fabsround)r   r   rO   r   r   r   r   r   r   totalr   r   r   Zsentiment_dictr   r   r   r   T  s6   

z(SentimentIntensityAnalyzer.score_valenceN)rl   )r=   r>   r?   r@   r    rq   r   r}   r   r~   r   r   r   r   r   r   r   r   r   r   r   rk   K  s     

"21
rk   )r@   r1   rD   rG   	itertoolsr   Z	nltk.datarm   Z	nltk.utilr   r   rK   rk   r   r   r   r   <module>   s    gD