
    `i,j              	          d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	m
Z
mZmZmZ ddlZddlZddlmZmZ 	 ddlZn# e$ r dZY nw xY wg dZeZda ej        e          Zdej        dd	d
ej        fdZdeeej        d	f         de	eej        f         de	eej        f         d
efdZdedej        de d
ed	         fdZ!ddej        ded
d	fdZ"dded         d
efdZ#d Z$ ej%        e$            G d d          Z&dS )a  Wrapper for cuDNN frontend to improve user experience.

This wrapper provides a more user-friendly interface for cuDNN frontend.
It allows users to create a graph, add operations to the graph, and then
compile the graph to a cuDNN plan. This wrapper is designed to avoid
boilerplate code.

Key Features:
    - Automatic graph validation and compilation
    - Simplified tensor management
    - Support for both named and positional tensor inputs
    - Automatic workspace management
    - PyTorch integration with DLPack support

Example:
    >>> x = torch.randn(8, 56, 56, 64, device=device, dtype=torch.float16).permute(0, 3, 1, 2)
    >>> w = torch.randn(32, 3, 3, 64, device=device, dtype=torch.float16).permute(0, 3, 1, 2)
    >>> with Graph() as graph:
    ...     y = graph.conv_fprop(
    ...         image=x, weight=w,
    ...         padding=[1,1], stride=[1,1], dilation=[1,1],
    ...         compute_data_type=data_type.FLOAT,
    ...         name="conv2d",
    ...     )
    ...     y.set_output(True).set_data_type(data_type.HALF)
    ...     # Graph is automatically validated and compiled on exit
    >>> graph.set_io_tuples(["conv2d::image", "conv2d::weight"], ["conv2d::Y"])
    >>> # Execute the graph
    >>> output = graph(x, w)
    )OrderedDictN)AnyDictListOptionalTupleUnion)	data_type	heur_mode)Graphr
   r   cudnngraphtensortorch.Tensorreturnc                     t          |d          r.|j        r'|                     |                                          S |                     |          S )a  Create a tensor in the graph object.

    Args:
        graph: The cuDNN graph object to create the tensor in
        tensor: The dlpack tensor to create a graph tensor from

    Returns:
        A cuDNN tensor object representing the input tensor in the graph

    Note:
        If the input tensor has requires_grad=True, it will be detached
        before creating the graph tensor to avoid gradient tracking issues.
    requires_grad)hasattrr   tensor_likedetach)r   r   s     a/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/cudnn/wrapper.py_graph_tensorr   :   sP     v'' )F,@ )  111  (((    
tensor_map
dlpack_mapc                    t          | t                    r=| |v r| S |                                D ]!\  }}|                                | k    r|c S "nt          | t                    r7|                                D ]!\  }}|                                | k    r|c S "nt          | t          j                  r#|                                D ]\  }}| |u r|c S not          | d          r_t          |t                    rJt          |           |v r9|t          |                    } |                                D ]\  }}|| k    r|c S t          d          )a7  Find the mapping name for a tensor used in a graph.

    This function searches for a tensor in the tensor map and returns its
    corresponding name. The tensor can be specified in multiple ways:
    - As a string (either the assigned tensor name or the node::input_name)
    - As a cuDNN tensor object
    - As a DLPack-compatible tensor (e.g., PyTorch tensor) that was used in creating the graph

    Args:
        tensor: The tensor to find, can be a string name, cuDNN tensor, or DLPack tensor
        tensor_map: Dictionary mapping tensor names to cuDNN tensor objects
        dlpack_map: Dictionary mapping DLPack tensor IDs to cuDNN tensor objects

    Returns:
        The key in tensor_map that the provided tensor is mapped to

    Raises:
        ValueError: If the tensor cannot be found in the tensor map
    
__dlpack__zInput not found in tensor map)
isinstancestritemsget_nameintget_uidr   r   r   dictid
ValueError)r   r   r   tensor_nametensor_values        r   _find_tensorr)   O   s   0 &# #ZM)3)9)9);); 	# 	#%K$$&&&00"""" 1	# 
FC	 	  #)3)9)9);); 	# 	#%K##%%//"""" 0	# 
FEL	)	) #)3)9)9);); 	# 	#%K%%"""" &	# 
	&	& #:j$+G+G #BvJJZdLdLdBvJJ')3)9)9);); 	# 	#%Kv%%"""" &
4
5
55r   namearg_dictc                     | |v r||          S ||v r||         S 	 ||                                          S # t          $ r Y nw xY w	 ||                                         S # t          $ r Y dS w xY w)a3  Extract a dlpack tensor from the arg_dict that matches the provided name or cudnn tensor

    Args:
        name: The name of the tensor to extract
        tensor: The cudnn tensor to extract
        arg_dict: The dictionary of arguments to extract the tensor from

    Returns:
        A dlpack tensor
    N)r!   KeyErrorr#   )r*   r   r+   s      r   _extract_tensorr.      s     x~))**   (())   tts!   4 
A AA 
A-,A-pytcudnn_tensortensor_typec                    |dk    rt          d          t          j                                        st	          d          t          j                            |                                           }|$t          d|                                            t          j	        | 
                                d|          }t          j        || 
                                |                                           }|S )a0  Create a tensor like the provided cudnn tensor

    Args:
        cudnn_tensor: The cuDNN tensor to create a dlpack tensor from
        tensor_type: The type of tensor to create, currently only "pyt" is supported

    Returns:
        A dlpack tensor allocated that is like the provided cuDNN tensor
    r/   z(Only PyTorch tensor is supported for nowPyTorch is not availableNz0cuDNN uses an unsupported data type in PyTorch: cudadevicedtype)NotImplementedErrorr   	datatypesis_torch_availableRuntimeError_cudnn_to_torch_data_typeget_data_type	TypeErrortorchemptyget_dim
as_strided
get_stride)r0   r1   r7   r   s       r   _tensor_likerD      s     e!"LMMM?--// 75666O55l6P6P6R6RSSE}i<KeKeKgKgiijjj[--//eLLLFfl&:&:&<&<l>U>U>W>WXXFMr   streamztorch.cuda.Streamc                     t           t          d          t          t          j                    a| #t           j                                        j        } t          j        t          |            t          S )z>Get the default cuDNN handle and set to torch's current streamNr3   )handlerE   )	r?   r;   _default_cudnn_handler   create_handler4   current_streamcuda_stream
set_stream)rE   s    r   get_default_handlerM      se     }5666$ % 3 5 5~**,,8	1&AAAA  r   c                  J    t           t          j        t                      d S d S N)rH   r   destroy_handle r   r   destroy_default_handlerR      s(    (233333 )(r   c                      e Zd ZU dZdZee         ed<   dddddddee         deee	e
dej        f                           d	eee	e
dej        f                           d
eee                  deddfdZd Zd Zd Zde
defdZd Zde	ded         f         fdZdee
df         dee
df         fdZdee	e
dej        f                  d	ee	e
dej        f                  ddfdZdS )r   a  Wrapper object for cuDNN computation graph

    This class simplifies the process of creating, compiling, and executing
    cuDNN computation graphs. It handles common boilerplate code and provides
    a more Pythonic interface to the cuDNN frontend API.

    Key features:
    - Automatic graph validation and compilation
    - Simplified tensor management with PyTorch integration
    - Support for both named and positional tensor inputs
    - Automatic workspace management

    Note:
        The graph is automatically validated and compiled when exiting the
        context manager. Any errors in graph construction will be raised
        at that point.
    N_Graph__handleT)rG   inputsoutputs
heuristicsworkspace_allocrG   rU   r   rV   rW   rX   r   c                X   t           t          d          |r+t          |t          t          f          st          d          |r+t          |t          t          f          st          d          |r+t          |t          t          f          st          d          t          j                    dk     rt          d          || _        d | _	        i | _
        t                      | _        t                      | _        g | _        i | _        t!                      | _        d | _        d | _        |pg | _        |pg | _        |pt,          j        t,          j        g| _        |sd| _        |r|| _        dD ]6}||v r0t          j                            ||                   p||         ||<   7d S )	Nr3   inputs must be a list or tupleoutputs must be a list or tuplez"heuristics must be a list or tuplei@d z*cuDNN version 9.12.0 or higher is requiredF)io_data_typeintermediate_data_typecompute_data_type)r?   r;   r   listtupler&   r   backend_version_Graph__kwargs_Graph__graph_Graph__tensor_mapr   _Graph__tensor_in_Graph__tensor_out_Graph__tensor_unknown_Graph__node_countset_Graph__node_names_Graph__input_tuples_Graph__output_tuples_Graph__inputs_Graph__outputsr   AFALLBACK_Graph__heuristics_Graph__workspacerT   r9   _torch_to_cudnn_data_type)selfrG   rU   rV   rW   rX   kwargskeys           r   __init__zGraph.__init__   s    =9::: 	?*VdE];; 	?=>>> 	@:ge}== 	@>??? 	CjdE]CC 	CABBB ""U**KLLL&=='MM "EE"#" B&K9;	8J*K 	%$D 	#"DMR 	d 	dCf}}#oGGsTTcX^_bXcs	d 	dr   c                     d S rO   rQ   rt   s    r   __del__zGraph.__del__  s    r   c                     | j         t          d          t          j        di | j        dvr	d| j        ini | j        | _         | S )NzGraph already created)autoNrG   rQ   )rc   r;   r   pygraphrT   rb   ry   s    r   	__enter__zGraph.__enter__  se    <#6777} 
 
,0M,O,O$-((UW
 m
 

 r   c                    |#t                               d|           d| _         | j                                         | j                                         | j                            | j                   | j                                         | j                                         t          | d          s=t          j        | j                                        dt          j                  | _        | j        s| j        r |                     | j        | j                   | `| `t                               d| j                   t                               d| j                   t                               d| j                   | j        S )	a%  Exit the context manager, validating and compiling the graph.

        This method performs the following steps:
        1. Validates the graph structure
        2. Builds the operation graph
        3. Creates execution plans
        4. Checks hardware support
        5. Builds the final plans
        6. Allocates workspace memory

        Raises:
            ValidationError: If graph validation fails
            GraphStateError: If graph operations are performed in invalid order
            CudnnError: For other cuDNN-related errors
        Nz'Exception during graph construction: %s__workspacer4   r5   z
Inputs: %szOutputs: %szNode count: %s)loggererrorrc   validatebuild_operation_graphcreate_execution_plansrq   check_supportbuild_plansr   r?   r@   get_workspace_sizeuint8rr   rm   rn   set_io_tuplesdebugre   rf   rh   )rt   exc_type	exc_valuetbs       r   __exit__zGraph.__exit__  s\   " LLBINNNDL**,,,++D,=>>>""$$$  """t]++ 	${//11k     D
 = 	>DN 	>t}dn===M4>\4#3444]D$5666%t'8999|r   r*   c                      t           j                  g d}|v rS g d}|v rt          d d          t          j                  sS  fd}dv r|S  fd}|S )N)r    get_workspace_size_plan_at_index	serializedeserializequery_tensor_attributes_of_uid)buildr   build_plan_at_indexr   r   create_execution_planr   deselect_behavior_notesdeselect_enginesdeselect_numeric_notesdeselect_workspace_greater_thanexecuteexecute_plan_at_indexget_behavior_notes$get_behavior_notes_for_plan_at_indexget_engine_countget_execution_plan_countget_knobs_for_engineget_plan_name_at_indexrv   populate_cuda_graphr   select_behavior_notesselect_numeric_notesupdate_cuda_graphr   zCalling z via wrapper is not allowedc                  L     | i |}j                             |           |S rO   )rg   append)argsru   outputattrrt   s      r   tensor_capturez)Graph.__getattr__.<locals>.tensor_capturem  s3    T4*6**F!((000Mr   )r   r   c                     t          |           } j        vr
dj        <   j        xx         dz  cc<   d|v r	|d         }n dj                 dz
   }||d<   |j        v rt          d| d          j                            |           t          |           D ]\  }}t          |d          rGt          |          }|j        vrt          j
        |          j        |<   j        |         x}| |<   t          |t          j                  r|j        | d| <   |                                D ]\  }}t          |d          rGt          |          }|j        vrt          j
        |          j        |<   j        |         x}||<   t          |t          j                  r|j        | d| <    
| i |}t          |t          j                  r|g}nt          |t           t           f          r|}t          |          D ]i\  }}t          |t          j                  rJt          |d	          r)|                                r|                                }	n| d| }	|j        |	<   j|S )
Nr      r*   .z
Node name z already usedr   z::r!   )r_   rh   rj   r&   add	enumerater   r%   rd   r   rc   r   r   r   re   r    r`   r!   rf   )r   ru   	node_nameiobjobj_idrv   r   output_listr'   r   r*   rt   s             r   wrapperz"Graph.__getattr__.<locals>.wrapperv  s   ::D4,,,*+!$'d###q(###"6N		#AAd&7&=a&?AA	!*vD--- !Fi!F!F!FGGG!!),,,#D// @ @33-- >WWFT%6664A$,PS4T4T)&1$($5f$==C$q'c5<00 @<?D$	%8%8Q%8%89"LLNN B BS3-- BWWFT%6664A$,PS4T4T)&1(,(9&(AAC&+c5<00 B>AD$	%:%:S%:%:;T4*6**F&%,// %%hFT5M22 %$#K00 9 93c5<00 9sJ// :CLLNN :&)llnn)2&9&9a&9&958D%k2Mr   )getattrrc   r;   inspectismethod)rt   r*   pass_throughblocked_methodsr   r   r   s   ``    @r   __getattr__zGraph.__getattr__;  s    t|T**
 
 
 <K
 
 
: ?""K$KKKLLL%% 	K	 	 	 	 	 	
 ,,,!!-	 -	 -	 -	 -	 -	 -	^ r   c                 "   | j         t          d          | j                                         st          d          t          |          dk    r/t	          |d         t
                    r | j        |d         fi |S t          |          dk    r| j        st          d          t          |          t          | j                  k    r5t          dt          |           dt          | j                   d	           | j	        |i |S )
z"Execute the graph with tensor dictNzGraph not createdz8You should not invoke the graph before the context exitsr   r   zXYou should not invoke the graph with positional arguments before running set_io_tuples()zNumber of arguments (z#) does not match number of inputs ())
rc   r;   r   lenr   r$   _Graph__call_with_tensor_dictrk   r&   !_Graph__call_with_positional_args)rt   r   ru   s      r   __call__zGraph.__call__  s(   <2333|4466 	[YZZZt99>>ja$77>/4/QBB6BBB4yy1}}T%8} !{|||4yyC 34444   "CT  "C  "Cgjkok~gg  "C  "C  "C  D  D  D343TDVDDDr   )r   .c                 >   i }t          | j        |          D ]\  }}|||                                <   d | j        D             }t          | j        |          D ]\  }}|||                                <   t	          |          }d|vr>| j        dk    rt                      |d<   n!| j        | j        |d<   nt          d          d|vr"| j        du rt          d          | j        |d<    | j	        j
        |fi | t          |          d	k    r|d
         S |S )a?  Execute the graph with positional arguments.

        Args:
            *args: Positional arguments to pass to the graph
            **kwargs: Additional keyword arguments to pass to the graph execution

        Returns:
            A single tensor or a tuple of tensors

        Note:
            This method is called by __call__() when the graph is executed with positional arguments.
            It is not intended to be called directly by the user. The `args` should be a list of dlpack tensors
            that matches the input order of `self.__input_tuples`.
        c                 .    g | ]}t          |d           S r/   )rD   ).0r0   s     r   
<listcomp>z5Graph.__call_with_positional_args.<locals>.<listcomp>  s"    cccl\599cccr   rG   r|   N-Need to specify cudnn handle to execute graph	workspaceF*Need to specify workspace to execute graphr   r   )ziprk   r#   rl   r$   rT   rM   r;   rr   rc   r   r   )rt   r   ru   variant_packr0   user_tensoroutput_tuples          r   __call_with_positional_argsz!Graph.__call_with_positional_args  sd   " ),T-@$)G)G 	? 	?%L+3>L--//00ccdNbccc),T-A<)P)P 	? 	?%L+3>L--//00f6!!}&&#5#7#7x  *#'=x  "#RSSSf$$5(("#OPPP&*&6{#\44V444|!!?"r   tensor_dictc                 :   	 i }i }t          j        | j                                        | j                                                  D ]\  }}|                                |v s|                                r0t          |||          }||||<   It          |d          st          d| d          |||                                <   g }g }	|                                D ]z\  }}|                                |v r|| j        v r@t          |d          x||                                <   ||<   |	                    |           e|                    |           {|rt          d|           |	rt                              d|	           t          |          }d|vr>| j        d	k    rt!                      |d<   n!| j        | j        |d<   nt          d
          d|vr"| j        du rt          d          | j        |d<    | j        j        |fi | |S )a  Execute the graph with a dictionary of tensors.

        Args:
            tensor_dict: Dictionary of tensor names to tensors
            **kwargs: Additional keyword arguments to pass to the graph execution

        Returns:
            Dictionary of tensor names to tensors

        Raises:
            RuntimeError: If a non-virtual tensor in the graph is not found in
            `tensor_dict`, or the tensor in `tensor_dict` is not a dlpack tensor
        Nr   zTensor z# is not provided as a dlpack tensorr/   z5Non-virtual input tensors not found in variant pack: zAdded output tensors: %srG   r|   r   r   Fr   )	itertoolschainre   r    rf   r#   get_is_virtualr.   r   r;   rD   r   r   r   r$   rT   rM   rr   rc   r   )
rt   r   ru   r   missing_tensorsr*   r   r   missing_inputsmissing_outputss
             r   __call_with_tensor_dictzGraph.__call_with_tensor_dict  s}   &	 %OD,<,B,B,D,DdFWF]F]F_F_`` 		9 		9LD&~~<//63H3H3J3J/)$DDK"*.';55 X"#VT#V#V#VWWW-8L))**+1133 		, 		,LFD~~<//t(((EQRXZ_E`E``V^^--.T1B&&t,,,, %%d++++ 	igWegghhh 	FLL3_EEEf6!!}&&#5#7#7x  *#'=x  "#RSSSf$$5(("#OPPP&*&6{#\44V444r   c           	         t          |t          t          f          st          d          t          |t          t          f          st          d          | j                                        s|| _        || _        dS g }t                      }t          |          D ]\  }}	 || j
        v r|                    |           %t          || j        | j                  }| j        |         }t          |          |v rt          d| d| d          |                    t          |                     |                    |           # t          $ r t          d| d| d          dw xY wg }t          |          D ]\  }}	 || j
        v r|                    |           %t          || j        | j                  }| j        |         }t          |          |v rt          d| d| d          |                    t          |                     |                    |           # t          $ r t          d| d| d          dw xY wt          |          D ],\  }}|                                rt          d| d	          -| j                                        D ]0\  }}|                                s||vrt          d
| d          1| j                                        D ]=\  }}|                                s$t          |          |vrt          d| d          >t          |          | _        t          |          | _        dS )a4  Set order of input and output tensors to allow graph to be executed with positional arguments.

        Args:
            inputs: List of input tensors or names
            outputs: List of output tensors or names

        Raises:
            ValueError: If inputs or outputs are not lists or tuples
        rZ   r[   NzInput at index z (z) is a duplicatez) not found in tensor mapzOutput at index z is a virtual tensorzNode output z4 is a non-virtual tensor but not specified as outputzNode input z= is a non-virtual tensor but not specified as input or output)r   r_   r`   r&   rc   r   rm   rn   ri   r   rg   r   r)   re   rd   r%   r   rf   r   r    rk   rl   )	rt   rU   rV   input_tensorstensors_foundr   r*   r   output_tensorss	            r   r   zGraph.set_io_tuples+  s    &4-00 	?=>>>'D%=11 	@>???|4466 	"DM$DNF  (( 	c 	cGAtc4000!((...#D$*:D<MNN)$/f::..$%Rq%R%RD%R%R%RSSS!!"V**---$$V,,,, c c c !W1!W!W!W!W!WXX^bbc  )) 	d 	dGAtd4000"))$///#D$*;T=NOO*40f::..$%S%S%ST%S%S%STTT!!"V**---%%f---- d d d !XA!X!X!X!X!XYY_ccd #=11 	L 	LIAv$$&& L !J1!J!J!JKKKL !-3355 	l 	lLD&((** lv^/K/K !j!j!j!jkkk ,2244 	t 	tLD&((** tr&zz/N/N !rt!r!r!rsss#M22$^44s&   %EBE"E-H-&BH--"I)__name__
__module____qualname____doc__rT   r   CudnnHandle__annotations__r   r	   r   r   r   r   boolrw   rz   r~   r   r   r   r   r   r   r   r   r   rQ   r   r   r   r      s#         $ '+Hh{#***
 )-KOLP04 $(d (d (d %(d eC$EFGH	(d
 $uS.%,%FGHI(d T)_-(d (d 
(d (d (d (dT    * * *Xj j j j j jXE E E+ eNTYZmTnDn>o +  +  +  + ZF#~-.F 
c>!	"	F F F FPG5U3<=>G5 eC=>?G5 
	G5 G5 G5 G5 G5 G5r   r   r   rO   )'r   collectionsr   atexitr   r   loggingtypingr   r   r   r   r   r	   r   cudnn.datatypesr
   r   r?   ImportError__all__r"   r   rH   	getLoggerr   r   r}   r   r   r   r)   r$   r.   rD   rM   rR   registerr   rQ   r   r   <module>r      s   > $ # # # # #        : : : : : : : : : : : : : : : :      & & & & & & & &LLLL   EEE 7
6
6   
	8	$	$) ) )5< ) ) ) )*-6#u|^34-6S%,&'-6 S%,&'-6 		-6 -6 -6 -6`# u| t Q_H`    2 u| # .    ,
! 
!x(;< 
! 
! 
! 
! 
!4 4 4
 & ' ' 'o5 o5 o5 o5 o5 o5 o5 o5 o5 o5s   ? A	A	