
    ,h
"                     @   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 	 ej                            d           n# e$ r  ej        d           Y nw xY w	 ej                            d           n# e$ r  ej        d	           Y nw xY w G d
 d          ZdS )    N)	stopwords)sent_tokenizeword_tokenize)SnowballStemmer)pipelineztokenizers/punktpunktzcorpora/stopwordsr   c                   >    e Zd ZdZddZddZd ZddZdd
Zd Z	dS )TextProcessorz9Clase para procesar texto para facilitar la accesibilidadspanishc                 X    || _         t          |          | _        d| _        d| _        dS )z
        Inicializa el procesador de texto
        
        Args:
            language (str): Idioma para el procesamiento de texto
        N)languager   stemmer_summarizer_simplifierselfr   s     QD:\NESTOR\Documents\Tesis\Asistente inteligente\backend\models\text_processing.py__init__zTextProcessor.__init__   s2     !&x00      simplifyc                     |dk    r|                      |          S |dk    r|                     |          S |dk    r|                     |          S t          d|           )uv  
        Procesa el texto según la operación solicitada
        
        Args:
            text (str): Texto a procesar
            operation (str): Operación a realizar (simplify, summarize, highlight)
            
        Returns:
            dict: Resultado del procesamiento
            
        Raises:
            ValueError: Si la operación no es válida
        r   	summarize	highlightu   Operación no válida: )simplify_textsummarize_texthighlight_keywords
ValueError)r   text	operations      r   processzTextProcessor.process%   sx     
""%%d++++%%&&t,,,+%%**4000ByBBCCCr   c           	        
 	 | j         t          dd          | _         d

fdt          dt                    
          D             }g }|D ]<}|                      |dd	d
          }|                    |d         d                    =d                    |          S # t          $ r t          | j                  }g }|D ]@}t          || j                  }	t          |	          dk     r|                    |           A|sC|
                    t                     |dt          dt          |          dz                     }d                    |          cY S w xY w)u   
        Simplifica un texto para hacerlo más accesible
        
        Args:
            text (str): Texto a simplificar
            
        Returns:
            str: Texto simplificado
        Nztext2text-generationfacebook/bart-large-cnnmodeli  c                 *    g | ]}||z            S  r&   .0i
max_lengthr   s     r   
<listcomp>z/TextProcessor.simplify_text.<locals>.<listcomp>M   &    VVVQq|^,VVVr   r         Fr*   
min_length	do_samplegenerated_text r      key      )r   r   rangelenappendjoin	Exceptionr   r   r   sortmax)r   r   segmentssimplified_segmentssegmentresult	sentencessimplified_sentencessentencewordsr*   s    `        @r   r   zTextProcessor.simplify_text<   s   	2'#+,BJc#d#d#d  JVVVVVeAs4yy*6U6UVVVH"$# H H))'cb\a)bb#**6!95E+FGGGG88/000 	2 	2 	2%dT]CCCI#% % : :%hGGGu::??(//999 ( M3''''01K#aY9J2K2K1K'L$88011111!	2s   BB$ $B?E&%E&333333?c           	      ~   	 | j         t          dd          | _         dfdt          dt                              D             }g }|D ]<}|                      |dd	d
          }|                    |d         d                    =d                    |          S # t          $ r t          | j                  t          t          j        | j                            }i }D ]M}	t          |	| j                  D ]4}
|
                                }
|
|vr|
|vrd||
<   $||
xx         dz  cc<   5Ni }t                    D ]\\  }}	t          |	| j                  D ]@}
|
                                }
|
|v r&||vr||
         ||<   *||xx         ||
         z  cc<   A]t          dt!          t                    |z                      }t#          |                                d d          d|         }fdt#          |d           D             }d                    |          cY S w xY w)u  
        Resume un texto conservando la información esencial
        
        Args:
            text (str): Texto a resumir
            ratio (float): Proporción del texto original a mantener
            
        Returns:
            str: Texto resumido
        Nsummarizationr"   r#   i   c                 *    g | ]}||z            S r&   r&   r'   s     r   r+   z0TextProcessor.summarize_text.<locals>.<listcomp>z   r,   r   r   r-   r.   Fr/   summary_textr3   r4   r8   c                     | d         S Nr8   r&   xs    r   <lambda>z.TextProcessor.summarize_text.<locals>.<lambda>   s
    1Q4 r   Tr7   reversec                 &    g | ]\  }}|         S r&   r&   )r(   r)   _rE   s      r   r+   z0TextProcessor.summarize_text.<locals>.<listcomp>   s!    bbbtq!ilbbbr   c                     | d         S )Nr   r&   rP   s    r   rR   z.TextProcessor.summarize_text.<locals>.<lambda>   s    \]^_\` r   r6   )r   r   r:   r;   r<   r=   r>   r   r   setr   rH   r   lower	enumerater@   intsorteditems)r   r   ratiorA   summary_segmentsrC   rD   
stop_wordsword_frequenciesrG   wordsentence_scoresr)   num_sentencesbest_sentencesr*   rE   s    `             @@r   r   zTextProcessor.summarize_texth   s   0	,'#+OC\#]#]#]  JVVVVVeAs4yy*6U6UVVVH!# C C))'cb\a)bb ''q	.(ABBBB88,--- !	, !	, !	,%dT]CCCI Y_T];;<<J!% 8 8)(T]KKK 8 8D::<<D:--'77756,T22,T222a72228 !O(33 I I8)(T]KKK I ID::<<D///O331A$1GOA..+A...2B42HH...I  3s9~~'=#>#>??M#O$9$9$;$;Y]^^^_m`m_mnNbbbbvnR`R`7a7a7abbbN88N+++++C!	,s   BB% %FH<;H<   c                    t          || j                  }t          t          j        | j                            }i }|D ]t}t          || j                  D ][}|                                }||vrAt          |          dk    r.|                                r||vrd||<   K||xx         dz  cc<   \ut          |
                                d d          d|         }d |D             }|}	|D ]V}
d	t          j        |
          z   d	z   }t          j        |d
|
                                z   d
z   |	t          j                  }	W|	|dS )u2  
        Identifica y destaca palabras clave en un texto
        
        Args:
            text (str): Texto a analizar
            num_keywords (int): Número de palabras clave a destacar
            
        Returns:
            dict: Texto original con palabras clave y lista de palabras clave
        r4   r9   r8   c                     | d         S rO   r&   rP   s    r   rR   z2TextProcessor.highlight_keywords.<locals>.<lambda>   s
    !A$ r   TrS   Nc                     g | ]\  }}|S r&   r&   )r(   rb   rV   s      r   r+   z4TextProcessor.highlight_keywords.<locals>.<listcomp>   s    111WT1D111r   z\bz**)flags)highlighted_textkeywords)r   r   rX   r   rH   r   rY   r;   isalnumr\   r]   reescapesubupper
IGNORECASE)r   r   num_keywordsrE   r`   ra   rG   rb   rl   rk   keywordpatterns               r   r   z TextProcessor.highlight_keywords   s    "$???	7788
 ! 	4 	4H%hGGG 4 4zz||z))c$ii!mmm#33312(..(...!3...4 *0022PTUUUVcWcVcd11111   	u 	uGbi00058G!vgtgmmoo/E/LN^fhfsttt !1 
 
 	
r   c                 <    || _         t          |          | _        dS )z
        Cambia el idioma del procesador
        
        Args:
            language (str): Idioma para el procesamiento
        N)r   r   r   r   s     r   set_languagezTextProcessor.set_language   s     !&x00r   N)r   )r   )rI   )rf   )
__name__
__module____qualname____doc__r   r    r   r   r   rw   r&   r   r   r
   r
      s        CC       D D D D.*2 *2 *2X<, <, <, <,|)
 )
 )
 )
V1 1 1 1 1r   r
   )rn   nltknltk.corpusr   nltk.tokenizer   r   	nltk.stemr   transformersr   datafindLookupErrordownloadr
   r&   r   r   <module>r      sA   				  ! ! ! ! ! ! 6 6 6 6 6 6 6 6 % % % % % % ! ! ! ! ! !INN%&&&&   DM'INN&''''   DM+F1 F1 F1 F1 F1 F1 F1 F1 F1 F1s!   ? AAA6 6BB