
    `i$                        d dl Z d dlZd dlZd dlZd dlZd dlZd Z e            rdndZ ej        ee	          Z
g dZeD ]Z ee
e           e            e<   ddlmZmZ d	Zej        d
d
dej        ddfdZej        fdZee
j        _        ee_        d Z	 	 	 	 ddZ	 	 	 	 ddZee_        ee_         d Z!d Z" e            r e!             n
 e"             ddl#m#Z#m$Z$m%Z% ddl&m'Z' d dl(m)Z) de*de)fdZ+dS )    Nc                  @    t           j                            d          S )Nwin)sysplatform
startswith     b/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/cudnn/__init__.py
is_windowsr   	   s    <""5)))r	   z.Release._compiled_modulez._compiled_module)package)backend_versionbackend_version_stringget_last_error_stringdestroy_handlenorm_forward_phasereduction_modebehavior_note	knob_typecreate_handlecreate_kernel_cachecreate_device_properties
get_streamnumerical_note
set_streambuild_plan_policy	data_typetensor_reordering	heur_modepygraphtensorknobcudnnGraphNotSupportedErrordiagonal_alignmentattention_implementation   )_library_type_is_torch_tensorz1.18.0F c
                 X    |                      ||t          |          ||||||		  	        S )ag  
    Create a tensor.

    Args:
        dim (List[int]): The dimensions of the tensor.
        stride (List[int]): The strides of the tensor.
        data_type (cudnn.data_type): The data type of the tensor.
        is_virtual (bool): Flag indicating if the tensor is virtual.
        is_pass_by_value (bool): Flag indicating if the tensor is passed by value.
        ragged_offset (cudnn_tensor): The ragged offset tensor.
        reordering_type (cudnn.tensor_reordering): The reordering type of the tensor.
        name (str): The name of the tensor.

    Returns:
        cudnn_tensor: The created tensor.
    )	dimstrider   
is_virtualis_pass_by_valueragged_offsetreordering_typenameuid)_make_tensorr&   )
selfr+   r,   r   r-   r.   r/   r0   r1   r2   s
             r
   _tensorr5   4   sD    8 	**)#'  
 
 
r	   c                 F    |                      t          |                    S N)_set_data_typer&   )r4   r   s     r
   r8   r8   ]   s      }Y77888r	   c                     t          |           t          u r| S t          |           r|                                 S t                              |           S r7   )typeintr'   data_ptr_pybind_module_get_data_ptr)input_tensors    r
   _library_device_pointerr@   h   sS    LS  	,	'	' :$$&&& ++L999r	   c                     d |                                 D             }t          |          }|                     |||           dS )aQ  
    Execute a cudnn graph.

    Args:
        tensor_to_device_buffer (dict(cudnn_tensor, Union[torch.Tensor, int, __dlpack__])): The dimensions of the tensor.
        workspace (Union[torch.Tensor, int, __dlpack__]): The name of the tensor.
        handle: cudnn_handle created with cudnn.create_handle()
    Returns:
        None
    c                     i | ]A\  }}|t          |          t          u r|n|                                t          |          BS r7   r:   r;   get_uidr@   .0xpointers      r
   
<dictcomp>z_execute.<locals>.<dictcomp>   d       S]STV]  EF  ERT!WW^^.Eg.N.N  ER  ER  ERr	   N)itemsr@   _execute)	r4   tensor_to_device_buffer	workspacehandleoverride_uidsoverride_shapesoverride_stridesuid_to_tensor_pointerworkspace_pointers	            r
   rL   rL   t   sc    & axa~a~  bA  bA   0	::MM'):FCCCCCr	   c           	          d |                                 D             }t          |          }	|                     ||	|||||           dS )a  
    Execute a cudnn graph.

    Args:
        tensor_to_device_buffer (dict(cudnn_tensor, Union[torch.Tensor, int, __dlpack__])): The dimensions of the tensor.
        workspace (Union[torch.Tensor, int, __dlpack__]): The name of the tensor.
        index(int): Location of execution plan to use.
        handle: cudnn_handle created with cudnn.create_handle()
    Returns:
        None
    c                     i | ]A\  }}|t          |          t          u r|n|                                t          |          BS r7   rC   rE   s      r
   rI   z*_execute_plan_at_index.<locals>.<dictcomp>   rJ   r	   N)rK   r@   _execute_plan_at_index)
r4   rM   rN   indexrO   rP   rQ   rR   rS   rT   s
             r
   rW   rW      s|    * axa~a~  bA  bA   0	::    r	   c                     t          j         t          j                            t	          j        d          d                    } | rTt          |           dk    sJ dt          |            d            t          j        	                    | d                   }nt          j        	                    d          }t          j
        |j        t          j                  j        }t                              |           d S )Npurelibznvidia/cudnn/bin/cudnn64_9.dllr%   Found z% libcudnn.dll.x in nvidia-cudnn-cuXX.r   zcudnn64_9.dll)globospathjoin	sysconfigget_pathlenctypeswindllLoadLibrarycast_handlec_void_pvaluer=   _set_dlhandle_cudnnlib_pathlibrO   s      r
   
load_cudnnrn      s    yi&8&C&CEeffggH 98}}!!!#`CMM#`#`#`!!!m''44m''88[fo66<F&&v.....r	   c                     t          j         t          j                            t	          j        d          d                    } | sDt          j         t          j                            t	          j        d          d                    } | rIt          |           dk    sJ dt          |            d            t          j        | d                   }nN	 t          j        d          }n8# t          $ r+ 	 t          j        d	          }n# t          $ r d }Y nw xY wY nw xY w|Et          j
        |j        t          j                  j        }t                              |           d S d S )
NrZ   z#nvidia/cudnn/lib/libcudnn.so.*[0-9]z'nvidia/cudnn_jit/lib/libcudnn.so.*[0-9]r%   r[   z$ libcudnn.so.x in nvidia-cudnn-cuXX.r   zlibcudnn.so.9zlibcudnn.so)r\   r]   r^   r_   r`   ra   rb   rc   CDLL	Exceptionrf   rg   rh   ri   r=   rj   rk   s      r
   _dlopen_cudnnrr      s\   yi&8&C&CEjkkllH u9RW\\)*<Y*G*GIrsstt 
8}}!!!#_CMM#_#_#_!!!k(1+&&	+o..CC 	 	 	k-00   	 S[&/::@**622222 s6   C, ,
D!7DD!DD!DD! D!)graphjitgraph_cache)Graph)Anyr1   returnc                 `   | dk    r.	 ddl m} |S # t          $ r}t          d|           |d }~ww xY w| dk    r.	 ddlm} |S # t          $ r}t          d|           |d }~ww xY w| dk    r.	 dd	lm} |S # t          $ r}t          d
|           |d }~ww xY w| dk    r.	 ddlm} |S # t          $ r}t          d|           |d }~ww xY w| dk    r.	 ddlm	} |S # t          $ r}t          d|           |d }~ww xY w| dk    r.	 ddl
m} |S # t          $ r}t          d|           |d }~ww xY w| dk    r.	 ddlm} |S # t          $ r}t          d|           |d }~ww xY w| dk    r.	 ddlm}	 |	S # t          $ r}t          d|           |d }~ww xY wt          |           )NNSAr%   )rz   z_NSA requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': GemmSwigluSm100)r{   zkGemmSwigluSm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': gemm_swiglu_wrapper_sm100)r|   zugemm_swiglu_wrapper_sm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': GemmAmaxSm100)r}   ziGemmAmaxSm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': gemm_amax_wrapper_sm100)r~   zsgemm_amax_wrapper_sm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': grouped_gemm)r   zhgrouped_gemm requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': GroupedGemmSwigluSm100)r   zrGroupedGemmSwigluSm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': !grouped_gemm_swiglu_wrapper_sm100)r   z}grouped_gemm_swiglu_wrapper_sm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': )native_sparse_attentionrz   rq   ImportErrorgemm_swiglur{   r|   	gemm_amaxr}   r~   r(   r   r   r   AttributeError)
r1   _NSAe_GemmSwigluSm100_gemm_swiglu_wrapper_sm100_GemmAmaxSm100_gemm_amax_wrapper_sm100_grouped_gemm_GroupedGemmSwigluSm100"_grouped_gemm_swiglu_wrapper_sm100s
             r
   __getattr__r      s<   u}}	L<<<<<<K 	L 	L 	L  D  AB  D  D  E  E  KL  L	L 
"	"	"	XHHHHHH## 	X 	X 	X  P  MN  P  P  Q  Q  WX  X	X 
,	,	,		      .- 	 	 	 L  IJ  L  L 	
 
	 	 	VBBBBBB!! 	V 	V 	V  N  KL  N  N  O  O  UV  V	V 
*	*	*	`      ,+ 	` 	` 	`  X  UV  X  X  Y  Y  _`  `	` 
			U777777   	U 	U 	U  M  JK  M  M  N  N  TU  U	U 
)	)	)	_WWWWWW** 	_ 	_ 	_  W  TU  W  W  X  X  ^_  _	_ 
4	4	4		      65 	 	 	 T  QR  T  T 	 T"""s    
2-2A 
A&A!!A&0A8 8
BBB$B, ,
C6C		CC   
D*C==DD 
D6D11D6 E 
E*E%%E*4E< <
FFF)NNNN),rc   r\   r]   r   r`   	importlibr   module_nameimport_module__name__r=   symbols_to_importsymbol_namegetattrglobals	datatypesr&   r'   __version__r   NOT_SETr   NONEr5   r8   r    set_data_typer   r@   rL   rW   executeexecute_plan_at_indexrn   rr   rs   rt   ru   wrapperrv   typingrw   strr   r   r	   r
   <module>r      si     				 



        * * * .8Z\\R))?R((hGGG   6 % B BK$W^[AAGGIIk 6 6 6 6 6 6 6 6 %*	
& & & &V 9 9 9 9 '5  #	: 	: 	:  D D D D@ " " " "J  6 / / /3 3 30 :<< JLLLLMOOO * * * * * * * * * *            M#c M#c M# M# M# M# M# M#r	   