
    `i                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ  G d d	          Z G d
 d          ZddddZ e
j        de	j                  Z e
j        de	j                  Z e
j        de	j                  Z e
j        de	j                  Z e
j        de	j                  Zde_        dS )    N)core)_compile)_cuda_typerules)_cuda_types)_internal_types)Scalarc                   (    e Zd ZdZddZd ZddZdS )	_CudaFunctionzJIT cupy function object
    Fc                    g | _         |r| j                             d           n| j                             d           |r| j                             d           t          |d|j                  | _        || _        || _        d S )N
__device__
__global__inlinename)
attributesappendgetattr__name__r   funcmode)selfr   r   devicer   s        h/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/cupyx/jit/_interface.py__init__z_CudaFunction.__init__   s     	1O""<0000O""<000 	-O""8,,,D&$-88					    c                     t           N)NotImplementedError)r   argskwargss      r   __call__z_CudaFunction.__call__"   s    !!r   Nc                 P    t          j        | j        | j        | j        ||          S r   )r   	transpiler   r   r   )r   in_typesret_types      r   _emit_code_from_typesz#_CudaFunction._emit_code_from_types%   s+    !It	8XG G 	Gr   )FFr   )r   
__module____qualname____doc__r   r    r%    r   r   r
   r
      s[            " " "G G G G G Gr   r
   c                   T    e Zd ZdZd Z	 d	dZd Zed             Zed             Z	dS )
_JitRawKernelzJIT CUDA kernel object.

    The decorator :func:``cupyx.jit.rawkernel`` converts the target function
    to an object of this class. This class is not intended to be instantiated
    by users.
    c                 L    || _         || _        || _        i | _        i | _        d S r   )_func_mode_device_cache_cached_codes)r   r   r   r   s       r   r   z_JitRawKernel.__init__2   s,    

r   r   Nc                    g }|D ]}t          |t          j                  r t          j                            |          }nNt          j        |          rt          j	        | j
        |          }nt          t          |           d          |                    |           t          |          }t          j                                        }	| j                            ||	fd          \  }
}|
| j                            |          }|8t)          j        | j        ddg| j
        |t          j                  }|| j        |<   |j        }|j        }|j        }|j        }|dk    r|dz  }|j        }t;          j        |j        |||          }|                     |          }
|
|f| j        ||	f<   g }tC          ||          D ]n\  }}t          |tD                    r?|j#        j$        d	k    rt          j%        |          }n|j#                            |          }|                    |           o |
||t          |          |||           dS )
a5  Calls the CUDA kernel.

        The compilation will be deferred until the first function call.
        CuPy's JIT compiler infers the types of arguments at the call
        time, and will cache the compiled kernels for speeding up any
        subsequent calls.

        Args:
            grid (tuple of int): Size of grid in blocks.
            block (tuple of int): Dimensions of each thread block.
            args (tuple):
                Arguments of the kernel. The type of all elements must be
                ``bool``, ``int``, ``float``, ``complex``, NumPy scalar or
                ``cupy.ndarray``.
            shared_mem (int):
                Dynamic shared-memory size per thread block in bytes.
            stream (cupy.cuda.Stream): CUDA stream.

        .. seealso:: :ref:`jit_kernel_definition`
        z is not supported for RawKernel)NNNz
extern "C"r   nvcc)z-DCUPY_JIT_NVCC)sourceoptionsbackendjitifye)&
isinstancecupyndarrayr   CArrayfrom_ndarraynumpyisscalarr   get_ctype_from_scalarr.   	TypeErrortyper   tuplecudaget_device_idr0   getr1   r   r"   r-   void	func_nameenable_cooperative_groupsr5   r6   r7   r   compile_with_cachecodeget_functionzipr   dtypecharfloat32)r   gridblockr   
shared_memstreamr#   xt	device_idkern	enable_cgresultfnamer5   r6   r7   modulenew_argsas                       r   r    z_JitRawKernel.__call__9   sc   ,  	 	A!T\** M&33A66"" M#9$*aHH477 K K KLLLOOA??I++--	+//8Y*?NNi<'++H55F~!+J!<0J$  06"8,$E8InGnG&  //]F,{	  F
 &&u--D26	1BDK9-.h'' 	 	DAq!V$$ (7<3&&a((AAQAOOAT5%//:vyIIIIIr   c                      |\  t          t                    sddft          t                    sddf fdS )zTNumba-style kernel call.

        .. seealso:: :ref:`jit_kernel_definition`
           c                       | fi |S r   r)   )r   r   rR   rQ   r   s     r   <lambda>z+_JitRawKernel.__getitem__.<locals>.<lambda>   s    ttD%'H'H'H'H r   )r9   rC   )r   grid_and_blockrR   rQ   s   ` @@r   __getitem__z_JitRawKernel.__getitem__   s_    
 %e$&& 	 !Q<D%'' 	"AqMEHHHHHHHr   c                     t          | j                  dk    rt          j        d           t	          d | j                                        D                       S )zReturns a dict that has input types as keys and codes values.

        This property method is for debugging purpose.
        The return value is not guaranteed to keep backward compatibility.
        r   zRNo codes are cached because compilation is deferred until the first function call.c                 &    g | ]\  }}||j         fS r)   )rK   ).0kvs      r   
<listcomp>z._JitRawKernel.cached_codes.<locals>.<listcomp>   s"    HHHTQa[HHHr   )lenr1   warningswarndictitems)r   s    r   cached_codesz_JitRawKernel.cached_codes   sa     t!""a''M+, , , HHT-?-E-E-G-GHHHIIIr   c                     | j         }t          |          dk    rt          j        d           t	          t          |                                                    S )zReturns `next(iter(self.cached_codes.values()))`.

        This property method is for debugging purpose.
        The return value is not guaranteed to keep backward compatibility.
        r`   zXThe input types of the kernel could not be inferred. Please use `.cached_codes` instead.)rp   rk   rl   rm   nextitervalues)r   codess     r   cached_codez_JitRawKernel.cached_code   sT     !u::>>M67 7 7 D(()))r   )r   N)
r   r&   r'   r(   r   r    rd   propertyrp   rv   r)   r   r   r+   r+   *   s               ;?GJ GJ GJ GJR
I 
I 
I 
J 
J X
J * * X* * *r   r+   rD   F)r   r   c                 T     t           j                            d            fd}|S )z=A decorator compiles a Python function into CUDA kernel.
    zcupyx.jit.rawkernelc                 L    t          j        t          |           |           S r   )	functoolsupdate_wrapperr+   )r   r   r   s    r   wrapperzrawkernel.<locals>.wrapper   s)    '$f--t5 5 	5r   )r:   _utilexperimental)r   r   r|   s   `` r   	rawkernelr      sB     	J12225 5 5 5 5 5 Nr   	threadIdxblockDimblockIdxgridDimwarpSizezRReturns the number of threads in a warp.

.. seealso:: :obj:`numba.cuda.warpsize`
)rz   rl   r>   r:   
cupy._corer   	cupyx.jitr   r   r   r   cupyx.jit._cuda_typesr   r
   r+   r   Datadim3r   r   r   r   int32warpsizer(   r)   r   r   <module>r      s                      % % % % % % ! ! ! ! ! ! % % % % % % ( ( ( ( ( (G G G G G G G G6}* }* }* }* }* }* }* }*@ U      !O k.>??	?
K,<==?
K,<==
/
y+*:
;
;?
K,=>>   r   