
    `i9                    4    d Z ddlmZ ddlmZ ddddddZdS )zPNVRTC (NVIDIA Runtime Compilation) utilities for compiling CUDA source to CUBIN.    )annotations)Sequencez	kernel.cuN)namearch
extra_optssourcestrr   r   
str | Noner   Sequence[str] | Nonereturnbytesc                  	 ddl m}m} n"# t          $ r}t	          d          |d}~ww xY w|	 |                    d          \  }||j        j        k    rt	          d|           |                                \  }}||j        j        k    rd}|	                    |j
        j        |          \  }}	||j        j        k    rt	          d|           |	                    |j
        j        |          \  }}
||j        j        k    rt	          d|           d|	 |
 }n&# t          $ r}t	          d	| d
          |d}~ww xY w|                    t                              |           t                              |          ddd          \  }}||j        j        k    rt	          d|           d|                                z   dg}|r|                    d |D                        |                    |t+          |          |          \  }||j        j        k    r|                    |          \  }}||j        j        k    rR|dk    rLd|z  }|                    ||          \  }||j        j        k    rd|                    d           }nd| }nd| }|                    |           t	          |          |                    |          \  }}||j        j        k    r'|                    |           t	          d|           d|z  }|                    ||          \  }||j        j        k    r'|                    |           t	          d|           |                    |           |S )a  Compile CUDA source code to CUBIN using NVRTC.

    This function uses the NVIDIA Runtime Compilation (NVRTC) library to compile
    CUDA C++ source code into a CUBIN binary that can be loaded and executed
    using the CUDA Driver API.

    Parameters
    ----------
    source : str
        The CUDA C++ source code to compile.

    name : str, optional
        The name to use for the source file (for error messages). Default: "kernel.cu"

    arch : str, optional
        The target GPU architecture (e.g., "sm_75", "sm_80", "sm_89"). If not specified,
        attempts to auto-detect from the current GPU.

    extra_opts : Sequence[str], optional
        Additional compilation options to pass to NVRTC (e.g., ["-I/path/to/include", "-DDEFINE=1"]).

    Returns
    -------
    bytes
        The compiled CUBIN binary data.

    Raises
    ------
    RuntimeError
        If NVRTC compilation fails or CUDA bindings are not available.

    Example
    -------
    .. code-block:: python

        from tvm_ffi.cpp import nvrtc

        cuda_source = '''
        extern "C" __global__ void add_one(float* x, float* y, int n) {
            int idx = blockIdx.x * blockDim.x + threadIdx.x;
            if (idx < n) {
                y[idx] = x[idx] + 1.0f;
            }
        }
        '''

        cubin_bytes = nvrtc.nvrtc_compile(cuda_source)
        # Use cubin_bytes with tvm_ffi.cpp.load_inline and embed_cubin parameter

    r   )drivernvrtczBCUDA bindings not available. Install with: pip install cuda-pythonNz"Failed to initialize CUDA driver: z(Failed to get compute capability major: z(Failed to get compute capability minor: sm_z(Failed to auto-detect GPU architecture: z-. Please specify 'arch' parameter explicitly.z Failed to create NVRTC program: s   --gpu-architecture=s   -default-devicec                d    g | ]-}t          |t                    r|                                n|.S  )
isinstancer	   encode).0opts     e/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/tvm_ffi/cpp/nvrtc.py
<listcomp>z!nvrtc_compile.<locals>.<listcomp>   s3    YYYsZS%9%9BSZZ\\\sYYY        zNVRTC compilation failed:
zutf-8z-NVRTC compilation failed (couldn't get log): zNVRTC compilation failed: z%Failed to get CUBIN size from NVRTC: z Failed to get CUBIN from NVRTC: )cuda.bindingsr   r   ImportErrorRuntimeErrorcuInitCUresultCUDA_SUCCESScuCtxGetDevicecuDeviceGetAttributeCUdevice_attribute,CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR	ExceptionnvrtcCreateProgramr	   r   nvrtcResultNVRTC_SUCCESSextendnvrtcCompileProgramlennvrtcGetProgramLogSizenvrtcGetProgramLogdecodenvrtcDestroyProgramnvrtcGetCUBINSizenvrtcGetCUBIN)r   r   r   r   r   r   eresultdevicemajorminorprogopts
result_loglog_sizelog_buf	error_msg
cubin_size	cubin_bufs                      r   nvrtc_compilerA      s2   r/////////   P
 
	 |	a((IV555"#P#P#PQQQ $2244NFF555 #77)VX^ MFE 555"#Vf#V#VWWW"77)VX^ MFE 555"#Vf#V#VWWW''''DD 	 	 	>1 > > >  	 ++CJJv,>,>

4@P@PRSUY[_``LFD"000FfFFGGG 	.D  [YYjYYYZZZ ))$D		4@@IV"000$;;DAA
H*888X\\XoG!44T7CCMZU.<<<S'..:Q:QSS		TFTT		=V==I!!$'''9%%% 0066FJ"000!!$'''K6KKLLLz!I##D)44IV"000!!$'''FfFFGGG 
d###s(    
*%*C3D% %
E/EE)
r   r	   r   r	   r   r
   r   r   r   r   )__doc__
__future__r   typingr   rA   r   r   r   <module>rE      sm   " W V " " " " " "       '+S S S S S S S Sr   