
    eTh2                        S SK Jr  S SKrS SKJr  S SKJrJr  \(       a  SSKJ	r	   " S S5      r
 " S	 S
\
5      r " S S\5      r " S S\5      rg)    )annotationsN)Queue)TYPE_CHECKINGOptional   )AutoTokenizerc                  $    \ rS rSrSrS rS rSrg)BaseStreamer   z?
Base class from which `.generate()` streamers should inherit.
c                    [        5       e)z;Function that is called by `.generate()` to push new tokensNotImplementedErrorselfvalues     Y/var/www/auris/envauris/lib/python3.13/site-packages/transformers/generation/streamers.pyputBaseStreamer.put        !##    c                    [        5       e)zHFunction that is called by `.generate()` to signal the end of generationr   r   s    r   endBaseStreamer.end$   r   r    N)__name__
__module____qualname____firstlineno____doc__r   r   __static_attributes__r   r   r   r
   r
      s    $$r   r
   c                  F    \ rS rSrSrS
SS jjrS rS rS
SS jjrS r	Sr
g	)TextStreamer)   a  
Simple text streamer that prints the token(s) to stdout as soon as entire words are formed.

<Tip warning={true}>

The API for the streamer classes is still under development and may change in the future.

</Tip>

Parameters:
    tokenizer (`AutoTokenizer`):
        The tokenized used to decode the tokens.
    skip_prompt (`bool`, *optional*, defaults to `False`):
        Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
    decode_kwargs (`dict`, *optional*):
        Additional keyword arguments to pass to the tokenizer's `decode` method.

Examples:

    ```python
    >>> from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

    >>> tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
    >>> model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
    >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
    >>> streamer = TextStreamer(tok)

    >>> # Despite returning the usual output, the streamer will also print the generated text to stdout.
    >>> _ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
    An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
    ```
c                R    Xl         X l        X0l        / U l        SU l        SU l        g )Nr   T)	tokenizerskip_promptdecode_kwargstoken_cache	print_lennext_tokens_are_prompt)r   r&   r'   r(   s       r   __init__TextStreamer.__init__K   s,    "&* &*#r   c                v   [        UR                  5      S:  a  UR                  S   S:  a  [        S5      e[        UR                  5      S:  a  US   nU R                  (       a  U R                  (       a  SU l        gU R
                  R                  UR                  5       5        U R                  R                  " U R
                  40 U R                  D6nUR                  S5      (       a  X R                  S n/ U l        SU l        O[        U5      S:  aO  U R                  [        US   5      5      (       a-  X R                  S nU =R                  [        U5      -  sl        O>X R                  UR                  S5      S-    nU =R                  [        U5      -  sl        U R!                  U5        g)	z]
Receives tokens, decodes them, and prints them to stdout as soon as they form entire words.
   r   z'TextStreamer only supports batch size 1FN
 )lenshape
ValueErrorr'   r+   r)   extendtolistr&   decoder(   endswithr*   _is_chinese_charordrfindon_finalized_text)r   r   textprintable_texts       r   r   TextStreamer.putU   sY    u{{aEKKNQ$6FGG!!HE ; ;*/D' 	/~~$$T%5%5L9K9KL ==!.."23N!DDNY]t44Sb]CC!.."23NNNc.11N "..4::c?Q3FGNNNc.11N~.r   c                   [        U R                  5      S:  aN  U R                  R                  " U R                  40 U R                  D6nXR
                  S n/ U l        SU l        OSnSU l        U R                  USS9  g)z;Flushes any remaining cache and prints a newline to stdout.r   N T)
stream_end)r3   r)   r&   r8   r(   r*   r+   r=   )r   r>   r?   s      r   r   TextStreamer.endw   sz     t 1$>>(()9)9PT=O=OPD!.."23N!DDNN&*#~$?r   c                ,    [        USU(       d  SOSS9  g)zNPrints the new text to stdout. If the stream is ending, also prints a newline.TrB   N)flushr   )printr   r>   rC   s      r   r=   TextStreamer.on_finalized_text   s    d$jBdCr   c                    US:  a  US::  dT  US:  a  US::  dH  US:  a  US::  d<  US:  a  US::  d0  US	:  a  US
::  d$  US:  a  US::  d  US:  a  US::  d  US:  a  US::  a  gg)z6Checks whether CP is the codepoint of a CJK character.i N  i  i 4  iM  i   iߦ i  i? i@ i i  i i   i  i  i TFr   )r   cps     r   r:   TextStreamer._is_chinese_char   sr     6\bFlfvg"-g"-g"-g"-fvg"-r   )r(   r+   r*   r'   r)   r&   NF)r&   'AutoTokenizer'r'   boolr>   strrC   rO   )r   r   r   r   r    r,   r   r   r=   r:   r!   r   r   r   r#   r#   )   s$    B+ /D@Dr   r#   c                  Z   ^  \ rS rSrSr S     S	U 4S jjjrS
SS jjrS rS rSr	U =r
$ )TextIteratorStreamer   a\  
Streamer that stores print-ready text in a queue, to be used by a downstream application as an iterator. This is
useful for applications that benefit from accessing the generated text in a non-blocking way (e.g. in an interactive
Gradio demo).

<Tip warning={true}>

The API for the streamer classes is still under development and may change in the future.

</Tip>

Parameters:
    tokenizer (`AutoTokenizer`):
        The tokenized used to decode the tokens.
    skip_prompt (`bool`, *optional*, defaults to `False`):
        Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
    timeout (`float`, *optional*):
        The timeout for the text queue. If `None`, the queue will block indefinitely. Useful to handle exceptions
        in `.generate()`, when it is called in a separate thread.
    decode_kwargs (`dict`, *optional*):
        Additional keyword arguments to pass to the tokenizer's `decode` method.

Examples:

    ```python
    >>> from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
    >>> from threading import Thread

    >>> tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
    >>> model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
    >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
    >>> streamer = TextIteratorStreamer(tok)

    >>> # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
    >>> generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
    >>> thread = Thread(target=model.generate, kwargs=generation_kwargs)
    >>> thread.start()
    >>> generated_text = ""
    >>> for new_text in streamer:
    ...     generated_text += new_text
    >>> generated_text
    'An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,'
    ```
c                `   > [         TU ]  " X40 UD6  [        5       U l        S U l        X0l        g N)superr,   r   
text_queuestop_signaltimeoutr   r&   r'   rZ   r(   	__class__s        r   r,   TextIteratorStreamer.__init__   s-     	A=A'r   c                    U R                   R                  XR                  S9  U(       a/  U R                   R                  U R                  U R                  S9  gg)\Put the new text in the queue. If the stream is ending, also put a stop signal in the queue.rZ   N)rX   r   rZ   rY   rH   s      r   r=   &TextIteratorStreamer.on_finalized_text   sD    D,,7OO 0 0$,,G r   c                    U $ rV   r   r   s    r   __iter__TextIteratorStreamer.__iter__       r   c                ~    U R                   R                  U R                  S9nXR                  :X  a
  [	        5       eU$ Nr`   )rX   getrZ   rY   StopIterationr   s     r   __next__TextIteratorStreamer.__next__   s6    ##DLL#9$$$/!Lr   )rY   rX   rZ   FNr&   rN   r'   rO   rZ   zOptional[float]rM   rP   )r   r   r   r   r    r,   r=   rc   rj   r!   __classcell__r\   s   @r   rS   rS      sG    +\ ae(7;N] H r   rS   c                  Z   ^  \ rS rSrSr S     S	U 4S jjjrS
SS jjrS rS rSr	U =r
$ )AsyncTextIteratorStreamer   a  
Streamer that stores print-ready text in a queue, to be used by a downstream application as an async iterator.
This is useful for applications that benefit from accessing the generated text asynchronously (e.g. in an
interactive Gradio demo).

<Tip warning={true}>

The API for the streamer classes is still under development and may change in the future.

</Tip>

Parameters:
    tokenizer (`AutoTokenizer`):
        The tokenized used to decode the tokens.
    skip_prompt (`bool`, *optional*, defaults to `False`):
        Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
    timeout (`float`, *optional*):
        The timeout for the text queue. If `None`, the queue will block indefinitely. Useful to handle exceptions
        in `.generate()`, when it is called in a separate thread.
    decode_kwargs (`dict`, *optional*):
        Additional keyword arguments to pass to the tokenizer's `decode` method.

Raises:
    TimeoutError: If token generation time exceeds timeout value.

Examples:

    ```python
    >>> from transformers import AutoModelForCausalLM, AutoTokenizer, AsyncTextIteratorStreamer
    >>> from threading import Thread
    >>> import asyncio

    >>> tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
    >>> model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
    >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")

    >>> # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
    >>> async def main():
    ...     # Important: AsyncTextIteratorStreamer must be initialized inside a coroutine!
    ...     streamer = AsyncTextIteratorStreamer(tok)
    ...     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
    ...     thread = Thread(target=model.generate, kwargs=generation_kwargs)
    ...     thread.start()
    ...     generated_text = ""
    ...     async for new_text in streamer:
    ...         generated_text += new_text
    >>>     print(generated_text)
    >>> asyncio.run(main())
    An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
    ```
c                   > [         TU ]  " X40 UD6  [        R                  " 5       U l        S U l        X0l        [        R                  " 5       U l        [        [        S5      U l
        g )NrZ   )rW   r,   asyncior   rX   rY   rZ   get_running_looploophasattrhas_asyncio_timeoutr[   s        r   r,   "AsyncTextIteratorStreamer.__init__  sP     	A=A!--/,,.	#*7I#> r   c                    U R                   R                  U R                  R                  U5        U(       a;  U R                   R                  U R                  R                  U R                  5        gg)r_   N)rv   call_soon_threadsaferX   
put_nowaitrY   rH   s      r   r=   +AsyncTextIteratorStreamer.on_finalized_text(  sL    		&&t'A'A4HII**4??+E+EtGWGWX r   c                    U $ rV   r   r   s    r   	__aiter__#AsyncTextIteratorStreamer.__aiter__.  re   r   c                $  #     U R                   (       a\  [        R                  " U R                  5       IS h  vN   U R                  R	                  5       I S h  vN nS S S 5      IS h  vN   O?[        R
                  " U R                  R	                  5       U R                  S9I S h  vN nWU R                  :X  a
  [        5       eU$  N Nr Nd! , IS h  vN  (       d  f       N9= f N@! [        R                   a    [        5       ef = f7frg   )	rx   rt   rZ   rX   rh   wait_forrY   StopAsyncIterationTimeoutErrorr   s     r   	__anext__#AsyncTextIteratorStreamer.__anext__1  s     	''"??4<<88"&//"5"5"77E 988 &..t/B/B/Ddll[[ ((((** 97 9888 \## 	!. 	!s   D5C. CC. CCC C. +C,?C. +C,,C. 0DC. CC. C)CC)%C. .DD)rx   rv   rY   rX   rZ   rl   rm   rM   rP   )r   r   r   r   r    r,   r=   r   r   r!   rn   ro   s   @r   rq   rq      sG    2j ae?(?7;?N]? ?Y r   rq   )
__future__r   rt   queuer   typingr   r   models.autor   r
   r#   rS   rq   r   r   r   <module>r      sQ     #   * +$ $v< vrD< DNU Ur   