o
    ZhT                     @   sv  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dl m	Z	 d dl
mZmZmZmZmZmZmZ d dlmZ dd	lmZ dd
lmZmZmZ eeZe rid dlZd dlmZ d dl m!Z! ndZe rtd dl"m#Z# e r}d dl$m%Z% e&e'e(e)ee*ddfZ+e,dej-Z.e,dej-Z/e,dej-ej0B Z1e,dej-Z2G dd de3Z4G dd de3Z5de(de6e(e(f fddZ7de(de6fddZ8dede6fd d!Z9d"e(de:ee( ee6 ee( f fd#d$Z;dede6fd%d&Z<d'd( Z=ed)d* Z>				+	+	+d5d,e?e?e6e(e(f   d-ee?ee6ef   d.ee?e6e(e(f   d/ee( d0ee) d1ee) d2ee) de(fd3d4Z@dS )6    N)contextmanager)datetime)	lru_cache)
isfunction)AnyCallableOptionalUnionget_args
get_originget_type_hints)version   )logging)is_jinja_availableis_torch_availableis_vision_available)	Extension)ImmutableSandboxedEnvironment)Image)Tensor.z(^(.*?)[\n\s]*(Args:|Returns:|Raises:|\Z)z0\n\s*Args:\n\s*(.*?)[\n\s]*(Returns:|Raises:|\Z)a1  
(?:^|\n)  # Match the start of the args block, or a newline
\s*(\w+):\s*  # Capture the argument name and strip spacing
(.*?)\s*  # Capture the argument description, which can span multiple lines, and strip trailing spacing
(?=\n\s*\w+:|\Z)  # Stop when you hit the next argument or the end of the block
z*\n\s*Returns:\n\s*(.*?)[\n\s]*(Raises:|\Z)c                   @      e Zd ZdZdS )TypeHintParsingExceptionzJException raised for errors in parsing type hints to generate JSON schemasN__name__
__module____qualname____doc__ r   r   U/var/www/auris/lib/python3.10/site-packages/transformers/utils/chat_template_utils.pyr   B       r   c                   @   r   )DocstringParsingExceptionzJException raised for errors in parsing docstrings to generate JSON schemasNr   r   r   r   r   r!   H   r    r!   
param_typereturnc                 C   sh   t dditdditdditdditd dditi i}t r#ddi|t< t r,ddi|t	< |
| dd	iS )
NtypeintegernumberstringbooleannullimageZaudioobject)intfloatstrboolr$   r   r   r   r   r   get)r"   Ztype_mappingr   r   r   _get_json_schema_typeN   s   r1   hintc                 C   s  t | }t| }|d u rzt| W S  ty   td| w |tu s,ttdrc|tju rcdd |D }t	|dkr>|d }nt
dd |D rSd	td
d |D i}nd|i}td |v rad|d< |S |tu rv|smd	diS dt|d dS |tu r|sd	diS t	|dkrtdt| dd dd|v rtdddd |D dS |tu rd	di}t	|dkrt|d |d< |S td| )NzGCouldn't parse this type hint, likely due to a custom class or object: 	UnionTypec                 S   s    g | ]}|t d urt|qS N)r$   _parse_type_hint.0tr   r   r   
<listcomp>l   s     z$_parse_type_hint.<locals>.<listcomp>r   r   c                 s   s    | ]
}t |d  tV  qdS )r$   N)
isinstancer.   r7   subtyper   r   r   	<genexpr>p   s    z#_parse_type_hint.<locals>.<genexpr>r$   c                 S   s   g | ]}|d  qS )r$   r   r;   r   r   r   r9   r       ZanyOfTZnullablearray)r$   itemszThe type hint ztyping. a1   is a Tuple with a single element, which we do not automatically convert to JSON schema as it is rarely necessary. If this input can contain more than one element, we recommend using a List[] type instead, or if it really is a single element, remove the Tuple[] wrapper and just pass the element directly..znConversion of '...' is not supported in Tuple type hints. Use List[] types for variable-length inputs instead.c                 S   s   g | ]}t |qS r   )r5   r6   r   r   r   r9      r>   )r$   ZprefixItemsr+      ZadditionalProperties)r   r
   r1   KeyErrorr   r	   hasattrtypesr3   lenallsortedr$   listr5   tupler.   replacedict)r2   originargssubtypesreturn_dictoutr   r   r   r5   ^   sT   


r5   funcc           	      C   s   t | }t| }g }|j D ]#\}}|jtjjkr'td|j	 d| j
 |jtjjkr3|| qi }| D ]
\}}t|||< q:d|d}|rP||d< |S )Nz	Argument z$ is missing a type hint in function r+   )r$   
propertiesrequired)r   inspect	signature
parametersr@   
annotation	Parameteremptyr   namer   defaultappendr5   )	rR   Z
type_hintsrV   rT   
param_nameparamrS   r"   schemar   r   r   "_convert_type_hints_to_json_schema   s    


ra   	docstringc           	      C   s   t | }t| }t| }|r|d nd}|r#|d nd}|r.|d nd}|durNddd |dD }t	|}dd |D }ni }|||fS )a  
    Parses a Google-style docstring to extract the function description,
    argument descriptions, and return description.

    Args:
        docstring (str): The docstring to parse.

    Returns:
        The function description, arguments, and return description.
    r   N
c                 S   s   g | ]}|  r|qS r   strip)r7   liner   r   r   r9      s    z1parse_google_format_docstring.<locals>.<listcomp>c              	   S   s(   i | ]}|d  t dd|d  qS )r   z	\s*\n+\s* r   )resubre   )r7   matchr   r   r   
<dictcomp>   s   ( z1parse_google_format_docstring.<locals>.<dictcomp>)
description_researchargs_re
returns_regroupre   joinsplitargs_split_refindall)	rb   Zdescription_matchZ
args_matchZreturns_matchdescriptionZdocstring_argsreturnsmatchesZ	args_dictr   r   r   parse_google_format_docstring   s   




rx   c                 C   s  t | }|std| j d| }t|\}}}t| }|d dd }dur3|dur3||d< |d  D ]A\}}||vrMtd| j d| d|| }	t	j
d	|	t	jd
}
|
rvdd t|
dD |d< |
jd|
   }	|	|d< q9| j||d}|dur||d< d|dS )a  
    This function generates a JSON schema for a given function, based on its docstring and type hints. This is
    mostly used for passing lists of tools to a chat template. The JSON schema contains the name and description of
    the function, as well as the names, types and descriptions for each of its arguments. `get_json_schema()` requires
    that the function has a docstring, and that each argument has a description in the docstring, in the standard
    Google docstring format shown below. It also requires that all the function arguments have a valid Python type hint.

    Although it is not required, a `Returns` block can also be added, which will be included in the schema. This is
    optional because most chat templates ignore the return value of the function.

    Args:
        func: The function to generate a JSON schema for.

    Returns:
        A dictionary containing the JSON schema for the function.

    Examples:
    ```python
    >>> def multiply(x: float, y: float):
    >>>    '''
    >>>    A function that multiplies two numbers
    >>>
    >>>    Args:
    >>>        x: The first number to multiply
    >>>        y: The second number to multiply
    >>>    '''
    >>>    return x * y
    >>>
    >>> print(get_json_schema(multiply))
    {
        "name": "multiply",
        "description": "A function that multiplies two numbers",
        "parameters": {
            "type": "object",
            "properties": {
                "x": {"type": "number", "description": "The first number to multiply"},
                "y": {"type": "number", "description": "The second number to multiply"}
            },
            "required": ["x", "y"]
        }
    }
    ```

    The general use for these schemas is that they are used to generate tool descriptions for chat templates that
    support them, like so:

    ```python
    >>> from transformers import AutoTokenizer
    >>> from transformers.utils import get_json_schema
    >>>
    >>> def multiply(x: float, y: float):
    >>>    '''
    >>>    A function that multiplies two numbers
    >>>
    >>>    Args:
    >>>        x: The first number to multiply
    >>>        y: The second number to multiply
    >>>    return x * y
    >>>    '''
    >>>
    >>> multiply_schema = get_json_schema(multiply)
    >>> tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-v01")
    >>> messages = [{"role": "user", "content": "What is 179 x 4571?"}]
    >>> formatted_chat = tokenizer.apply_chat_template(
    >>>     messages,
    >>>     tools=[multiply_schema],
    >>>     chat_template="tool_use",
    >>>     return_dict=True,
    >>>     return_tensors="pt",
    >>>     add_generation_prompt=True
    >>> )
    >>> # The formatted chat can now be passed to model.generate()
    ```

    Each argument description can also have an optional `(choices: ...)` block at the end, such as
    `(choices: ["tea", "coffee"])`, which will be parsed into an `enum` field in the schema. Note that this will
    only be parsed correctly if it is at the end of the line:

    ```python
    >>> def drink_beverage(beverage: str):
    >>>    '''
    >>>    A function that drinks a beverage
    >>>
    >>>    Args:
    >>>        beverage: The beverage to drink (choices: ["tea", "coffee"])
    >>>    '''
    >>>    pass
    >>>
    >>> print(get_json_schema(drink_beverage))
    ```
    {
        'name': 'drink_beverage',
        'description': 'A function that drinks a beverage',
        'parameters': {
            'type': 'object',
            'properties': {
                'beverage': {
                    'type': 'string',
                    'enum': ['tea', 'coffee'],
                    'description': 'The beverage to drink'
                    }
                },
            'required': ['beverage']
        }
    }
    z Cannot generate JSON schema for z because it has no docstring!rS   r#   Nru   z< because the docstring has no description for the argument ''z\(choices:\s*(.*?)\)\s*$)flagsc                 S   s   g | ]}|  qS r   rd   )r7   cr   r   r   r9   T  r>   z#get_json_schema.<locals>.<listcomp>r   enum)r[   ru   rW   function)r$   r}   )rU   getdocr!   r   re   rx   ra   popr@   rh   rm   
IGNORECASEjsonloadsrp   r'   start)rR   docZmain_docZparam_descriptionsZ
return_docZjson_schemarP   argr`   ZdescZenum_choicesoutputr   r   r   get_json_schema   s4   
k

r   c           
      K   sx   g }g }| j ||% | jd||||d|D ]}|| qd|}	W d    |	|fS 1 s3w   Y  |	|fS )Nmessagestools	documentsadd_generation_promptrA   r   )environmentactivate_trackergenerater]   rq   )
compiled_templater   r   r   r   Ztemplate_kwargsrendered_blocksgeneration_indicesblockrendered_chatr   r   r   _render_with_assistant_indices^  s"   



r   c                 C   s   t  stdG dd dt}ttjtdk r#tdtj ddd }dd
d}dd }tdd|tjj	gd}||j
d< ||jd< ||jd< || S )Nzbapply_chat_template requires jinja2 to be installed. Please install it using `pip install jinja2`.c                       s   e Zd ZdhZdef fddZdejjdej	j
fddZejd	ej	jd
ejjdefddZdefddZedee dee fddZ  ZS )z1_compile_jinja_template.<locals>.AssistantTrackerZ
generationr   c                    s*   t  | |j| jd d | _d | _d S )N)r   )super__init__extendr   _rendered_blocks_generation_indices)selfr   	__class__r   r   r   {  s   
z:_compile_jinja_template.<locals>.AssistantTracker.__init__parserr#   c                 S   s:   t |jj}|jdgdd}tj| dg g ||S )Nzname:endgenerationT)Zdrop_needle_generation_support)	nextstreamlinenoZparse_statementsjinja2nodes	CallBlockZcall_methodZ
set_lineno)r   r   r   bodyr   r   r   parse  s   z7_compile_jinja_template.<locals>.AssistantTracker.parsecontextcallerc                 S   s>   | }|   rtd| j}|t| }| j||f |S )NrA   )	is_activerF   rq   r   r   r]   )r   r   r   rvstart_indexZ	end_indexr   r   r   r     s   zE_compile_jinja_template.<locals>.AssistantTracker._generation_supportc                 S   s   | j p| jS r4   )r   r   )r   r   r   r   r        z;_compile_jinja_template.<locals>.AssistantTracker.is_activer   r   c                 s   sF    z|   r
td|| _|| _d V  W d | _d | _d S d | _d | _w )Nz3AssistantTracker should not be reused before closed)r   
ValueErrorr   r   )r   r   r   r   r   r   r     s   
zB_compile_jinja_template.<locals>.AssistantTracker.activate_tracker)r   r   r   tagsr   r   r   r   Parserr   r   r   Zpass_eval_contextZEvalContextZruntimeZMacror.   r   r/   r   r   rI   r,   r   __classcell__r   r   r   r   AssistantTrackerw  s     	$r   z3.1.0zLapply_chat_template requires jinja2>=3.1.0 to be installed. Your version is .c                 S   s   t j| r4   )r   
exceptionsZTemplateError)messager   r   r   raise_exception  r   z0_compile_jinja_template.<locals>.raise_exceptionFc                 S   s   t j| ||||dS )N)ensure_asciiindent
separators	sort_keys)r   dumps)xr   r   r   r   r   r   r   tojson  s   z'_compile_jinja_template.<locals>.tojsonc                 S   s   t  | S r4   )r   nowstrftime)formatr   r   r   strftime_now  s   z-_compile_jinja_template.<locals>.strftime_nowT)Ztrim_blocksZlstrip_blocks
extensionsr   r   r   )FNNF)r   ImportErrorr   r   r   r   __version__r   extZloopcontrolsfiltersglobalsZfrom_string)chat_templater   r   r   r   Z	jinja_envr   r   r   _compile_jinja_templatep  s&   *




r   Fconversationsr   r   r   return_assistant_tokens_maskcontinue_final_messager   c              	   K   s  |rt d|std t|}|d ur7g }	|D ]}
t|
tr&|	|
 qt|
r2|	t	|
 qt
dnd }	|d urK|D ]}t|tsJtdq?g }g }| D ]}t|dr[|j}|rrtd|||	||d|\}}|| n|jd||	||d|}|r|d d	 }t|ttfrt|D ]}d
|v r|d
 } nqt
d| |vrt
d|| }|||t|   |kr|d |t|   }n|d |t|   }|| qQ||fS )Nz\{\%-?\s*generation\s*-?\%\}zareturn_assistant_tokens_mask==True but chat template does not contain `{% generation %}` keyword.zTools should either be a JSON schema, or a callable function with type hints and a docstring suitable for auto-conversion to a schema.zADocuments should be a list of dicts with 'title' and 'text' keys!r   )r   r   r   r   r   r   contenttextz]continue_final_message is set but we could not find any text to continuein the final message!a  continue_final_message is set but the final message does not appear in the chat after applying the chat template! This can happen if the chat template deletes portions of the final message. Please verify the chat template and final message in your chat to ensure they are compatible.r   )rh   rm   loggerZwarning_oncer   r:   rL   r]   r   r   r   	TypeErrorrD   r   r   renderrI   rJ   reversedre   rindexrF   lstrip)r   r   r   r   r   r   r   kwargsr   Ztool_schemasZtoolZdocumentrenderedZall_generation_indicesZchatr   r   Zfinal_messageZcontent_blockZfinal_msg_locr   r   r   render_jinja_template  s   




r   )NNNFFF)ArU   r   rh   rE   
contextlibr   r   	functoolsr   r   typingr   r   r   r	   r
   r   r   	packagingr   rA   r   Zimport_utilsr   r   r   Z
get_loggerr   r   r   Z
jinja2.extr   Zjinja2.sandboxr   Z	PIL.Imager   Ztorchr   r,   r-   r.   r/   r$   ZBASIC_TYPEScompileDOTALLrl   rn   VERBOSErs   ro   	Exceptionr   r!   rL   r1   r5   ra   rJ   rx   r   r   r   rI   r   r   r   r   r   <module>   s   $


A(! 

K	