
    `iD                       d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZmZ dd
lmZ ej        dk    Z	 dPdQdZdRd!Zej        dSd"            Z dSd#Z!dTd+Z"	 dPdUd.Z#dVd0Z$e%&                    d1d2d3d4d5d6          Z'dWd8Z(dXd;Z)dYd>Z*	 	 dZd[dBZ+ddddddddddC	d\dGZ,dddddddddd?dH
d]dKZ-ddddddddLd^dMZ.dddddddd?dNd_dOZ/dS )`zBBuild and load C++/CUDA sources into a tvm_ffi Module using Ninja.    )annotationsN)MappingSequence)nullcontext)Path)Any)find_dlpack_include_pathfind_include_pathfind_libtvm_ffi)Moduleload_module)FileLockwin32
cpp_source
str | Nonecuda_source	cpp_filesSequence[str] | None
cuda_files	functions!Sequence[str] | Mapping[str, str]extra_cflagsSequence[str]extra_cuda_cflagsextra_ldflagsextra_include_pathsembed_cubinMapping[str, bytes] | Nonereturnstrc
                   
 t          j                    d
fd
 
| ||t          |          nd|t          |          nd||||||	f
                                           dd         S )	z;Generate a unique hash for the given sources and functions.objr   r   Nonec                   |                      d           d S t          | t                    r?                     d                                |                     d                     d S t          | t                    r,                     d                                |            d S t          | t
                    rY                     d           t          |                                           D ] }| |         } |            |           !d S t          | t                    r'                     d           | D ]} |           d S t          dt          |                      )Ns   Nones   strutf-8s   bytess   Mappings   SequencezUnsupported type: )update
isinstancer    encodebytesr   sortedkeysr   
ValueErrortype)r"   keyitem_hashms      i/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/tvm_ffi/cpp/extension.pyr0   z_hash_sources.<locals>._hash6   s~   ;HHWS!! 	?HHVHHSZZ(()))))U## 	?HHXHHSMMMMMW%% 	?HHZ   chhjj))  3xc


d  X&& 	?HH[!!!  d  =$s))==>>>    N   )r"   r   r   r#   )hashlibsha256r*   	hexdigest)r   r   r   r   r   r   r   r   r   r   r0   r1   s             @@r2   _hash_sourcesr8   '   s     	A? ? ? ? ? ? ?, 
E!*!6F9D","8F:d	
   ;;=="r3   pathcontentr#   c                b   t          |           }|                                rH|                                5 }|                                }ddd           n# 1 swxY w Y   ||k    rdS |                    d          5 }|                    |           ddd           dS # 1 swxY w Y   dS )zIWrite content to path if it does not already exist with the same content.Nw)r   existsopenreadwrite)r9   r:   pfexisting_contents        r2   _maybe_writerD   ^   s   T

Axxzz VVXX 	( vvxx	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(w&&F	
 	                 s#   AA AB$$B(+B(c                 D   t           j                            d          pt           j                            d          } | t          j        d          }|'t          t          |          j        j                  } nt          rit          d          }t          |
                    d                    }t          |          dk    rt          d          t          |d                   } nd	} t          |                                           st          d          | S )
zFind the CUDA install path.	CUDA_HOME	CUDA_PATHNnvccz2C:/Program Files/NVIDIA GPU Computing Toolkit/CUDAzv*.*r   zLCould not find CUDA installation. Please set CUDA_HOME environment variable.z/usr/local/cuda)osenvirongetshutilwhichr    r   parent
IS_WINDOWSlistgloblenRuntimeErrorr=   )	cuda_home	nvcc_path	cuda_root
cuda_homess       r2   _find_cuda_homerX   j   s    
{++Jrz~~k/J/JIL((	 DOO29::II  	. !UVV	!).."8"899
z??a''&f    
1..		-		??))++ "b   r3   c            	        dt           j        v rt           j        d                                         } g }| D ]q}t          |                    d                    dk    rt	          d|           |                    d          \  }}|                    d| | d| |            rd                    |          S 	 t          j        g dd	d	
          }|j	        
                    d                                                              d          d         }|                    d          \  }}d| | d| | S # t          $ r Y dS w xY w)z&Get the CUDA target architecture flag.TVM_FFI_CUDA_ARCH_LIST.   zInvalid CUDA architecture: z-gencode=arch=compute_z	,code=sm_ )z
nvidia-smiz--query-gpu=compute_capz--format=csv,noheaderT)argscapture_outputcheckr%   
r   z#-gencode=arch=compute_70,code=sm_70)rI   rJ   splitrR   r,   appendjoin
subprocessrunstdoutdecodestrip	Exception)	arch_listflagsarchmajorminorstatuscompute_caps          r2   _get_cuda_targetrr      s   2:--J78>>@@	 	Y 	YD4::c??##q(( !Et!E!EFFF::c??LE5LLW%WWWWPUWWXXXXxx	9^WWW#  F
 !-..w77==??EEdKKANK&,,S11LE5QEQ5QQ5Q%QQQ 	9 	9 	9888	9s   ?BE 
EEr^   	list[str]cwdstr | os.PathLike[str]r_   boolsubprocess.CompletedProcessc           	     V   	 t          t          t          j                            dd                    dz  dz  dz            }t          |                                          st          d          t          j        |ddd	d
ddgddd          j	        
                                }|st          d          t          t          |          dz  dz  dz            }t          |                                          st          d|           d                    |d                    |                     }t          j        |d||d          S # t          t          j        f$ r;}t          d                    d                    |                               |d}~ww xY w)zOLocates the Developer Command Prompt and runs a command within its environment.zProgramFiles(x86)zC:\Program Files (x86)zMicrosoft Visual Studio	Installerzvswhere.exezvswhere.exe not found.z-latestz-prereleasez	-products*z	-propertyinstallationPathT)r_   textr`   z$No Visual Studio installation found.Common7ToolszVsDevCmd.batzVsDevCmd.bat not found at: z'"{vsdevcmd_path}" -arch=x64 & {command}r]   )vsdevcmd_pathcommandF)r`   rt   r_   shellzEFailed to run the following command in MSVC developer environment: {}N)r    r   rI   rJ   rK   r=   FileNotFoundErrorre   rf   rg   ri   formatrd   CalledProcessErrorrS   )r^   rt   r_   vswhere_pathvs_install_pathr   cmd_commandes           r2   _run_command_in_dev_promptr      s   6 35NOOPP'( 
 
 L!!((** 	>#$<=== %."  
 
 
  	  	L#$JKKK D11I=G.XYYM""))++ 	S#$Q-$Q$QRRR
 @FF'$ G 
 

 ~u#nTX
 
 
 	
 z<=   SZZ 
 
 		s   EE F(-6F##F(name	with_cudac	           
     V   t                      t                      g}	t          t                                }
t	          |
j                  }|
j        }t          rg d}g d}dd| | dg}ngg d}g d}dd	| d
g}|rU|t                      gz  }|d	                    t	          t          t                                dz                      dgz  }|d |D             z   }|d |D             z   }|d |D             z   }|	d |D             z   }|D ]z}|                    d	                    |                    dd                               |                    d	                    |                    dd                               {g }|                    d           |                    d	                    t          j                            dt          rdnd                               |                    d	                    d                    |                               |r|                    d	                    t	          t          t                                dz  dz                                 |                    d	                    d                    |                               |                    d 	                    d                    |                               |                    d!           |                    d"           t          r+|                    d#           |                    d$           n?|                    d%           |                    d&           |                    d'           |                    d!           |ri|                    d(           |                    d%           |                    d&           |                    d)           |                    d!           t          s|r|                    d*           |                    d+           |                    d!           |                    d,           |                    d-t"          j         d.           |                    d!           |                    d/           t          r|                    d0           n|                    d1           |                    d!           g }t'          t)          |                    D ]]\  }}d2| d3}|                    d4	                    ||                    dd                               |                    |           ^t'          t)          |                    D ]]\  }}d5| d3}|                    d6	                    ||                    dd                               |                    |           ^t          rd7nd8}t          s|rd9}d                    |          }|                    d:| d;|            |                    d!           |}t)          |                                          D ]o}d<| d3} | d=}!|                    d:|  d>|            |                    d?|!            |                    d@|            |                    d!           | }p|                    d:|  | dA|            |                    d!           nGd                    |          }"|                    d:|  | dA|"            |                    d!           |                    dB|  |            |                    d!           dC                    |          S )Dz<Generate the content of build.ninja for building the module.)
/std:c++17z/MDz/wd4819z/wd4251z/wd4244z/wd4267z/wd4275z/wd4018z/wd4190z/wd4624z/wd4067z/wd4068z/EHsc)
-Xcompilerr   z/O2z/DLLz	/LIBPATH:z.lib)
-std=c++17-fPIC-O2)r   r   r   r   z-sharedz-Lz	-ltvm_ffiz-L{}lib64z-lcudartc                6    g | ]}|                                 S  ri   .0flags     r2   
<listcomp>z)_generate_ninja_build.<locals>.<listcomp>  s     EEEtzz||EEEr3   c                6    g | ]}|                                 S r   r   r   s     r2   r   z)_generate_ninja_build.<locals>.<listcomp>  s     (T(T(T$(T(T(Tr3   c                6    g | ]}|                                 S r   r   r   s     r2   r   z)_generate_ninja_build.<locals>.<listcomp>  s      H H H$ H H Hr3   c                j    g | ]0}t          t          |                                                    1S r   r    r   resolve)r   r9   s     r2   r   z)_generate_ninja_build.<locals>.<listcomp>  s?     - - -&*DJJ  !!- - -r3   z-I{}:z$:zninja_required_version = 1.3zcxx = {}CXXclzc++zcflags = {}r]   z	nvcc = {}binrH   zcuda_cflags = {}zldflags = {} zrule compilez5  command = $cxx /showIncludes $cflags -c $in /Fo$outz  deps = msvcz  depfile = $out.dz  deps = gccz7  command = $cxx -MMD -MF $out.d $cflags -c $in -o $outzrule compile_cudazm  command = $nvcc --generate-dependencies-with-compile --dependency-output $out.d $cuda_cflags -c $in -o $outzrule merge_objectsz  command = ld -r -o $out $inzrule embed_cubinz  command = z[ -m tvm_ffi.utils.embed_cubin --output-obj $out --input-obj $in --cubin $cubin --name $namez	rule linkz-  command = $cxx $in /link $ldflags /out:$outz%  command = $cxx $in $ldflags -o $outcpp_z.ozbuild {}: compile {}cuda_zbuild {}: compile_cuda {}.dll.soz	unified.ozbuild z: merge_objects unified_with_.cubinz: embed_cubin z
  cubin = z	  name = z: link zdefault ra   )r
   r	   r   r   r    rN   stemrO   rr   r   rX   rc   replacerI   rJ   rK   rd   sys
executable	enumerater*   r+   )#r   r   r   r   r   r   r   r   r   default_include_pathstvm_ffi_libtvm_ffi_lib_pathtvm_ffi_lib_namedefault_cflagsdefault_cuda_cflagsdefault_ldflagscflagscuda_cflagsldflagsinclude_pathsr9   ninja	obj_filesicpp_pathobj_name	cuda_pathextunified_objobj_files_strcurrent_obj
cubin_namenext_obj
cubin_filelink_files_strs#                                      r2   _generate_ninja_buildr      s!    /002J2L2LM(())K;-.."' !
 
 
 BAA*(**%%%
 877JJJ$&=+;&=&={K 	$4$6$6#77c$'8'8"9"9G"CDDEE  O
 EEEEEEF%(T(TBS(T(T(TTK H H- H H HHG) - -.A- - - M
  C CfmmDLLd$;$;<<===6==c4)@)@AABBBB E	LL/000	LL""2:>>%9VQV#W#WXXYYY	LL%%chhv&6&677888 G[''D1B1B,C,Ce,Kf,T(U(UVVWWW'..sxx/D/DEEFFF	LL&&sxx'8'899::: 
LL	LL    PLMMM_%%%%)***^$$$NOOO	LL ())))***^$$${	
 	
 	
 	R  
 		LL-...LL8999LLLL+,,,LL Ks~  K  K  K   LL	LL >DEEEE<===	LL I 	!2!233 # #8!<<<+228X=M=McSW=X=XYYZZZ""""!&"4"455 # #9 1===077)BSBSTWY]B^B^__```"""" 
)&&EC  + !++JkJJ=JJKKKR " !1!1!3!344 	# 	#J5z555H&...J LLG(GG+GGHHHLL2j22333LL1Z11222LL"KK 	=d=C====>>>R ),,@d@C@@@@AAAR 
LL'D'#''(((	LL99Ur3   	build_dirc                &   ddg}t           j                            dd          }||d|gz  }t          rt	          || d          }nt          j        d|| d	          }|j        d
k    rd|j         g}t          rdnd}|j        r0|	                    d|j        
                    |                      |j        r0|	                    d|j        
                    |                      t          d                    |                    dS )z:Build the module in the given build directory using ninja.r   z-vMAX_JOBSNz-jT)r^   rt   r_   F)r`   r^   rt   r_   r   zninja exited with status oemr%   zstdout:
zstderr:
ra   )rI   rJ   rK   rO   r   re   rf   
returncoderg   rc   rh   stderrrS   rd   )r   r   num_workersrp   msgencodings         r2   build_ninjar     s)   oG*..T22KD+&& _+iX\]]]e'yY]^^^A>6+<>>?&355G= 	EJJC6=#7#7#A#ACCDDD= 	EJJC6=#7#7#A#ACCDDD499S>>*** r3   z\\z\"z\nz\rz\t)\"ra   	sc                6    |                      t                    S )z2Escape special characters for C++ string literals.)	translate_CPP_ESCAPE_TABLE)r   s    r2   _escape_cpp_string_literalr     s    ;;()))r3   sourceMapping[str, str]c                .   dddddd| g}|                                 D ]N\  }}|                    d| d| d	           |r+t          |          }|                    d
| d| d           O|                    d           d                    |          S )z:Decorate the given source code with TVM FFI export macros.z%#include <tvm/ffi/container/tensor.h>z#include <tvm/ffi/dtype.h>z#include <tvm/ffi/error.h>z$#include <tvm/ffi/extra/c_env_api.h>z#include <tvm/ffi/function.h>r   zTVM_FFI_DLL_EXPORT_TYPED_FUNC(z, z);z"TVM_FFI_DLL_EXPORT_TYPED_FUNC_DOC(z, "z");ra   )itemsrc   r   rd   )r   r   sources	func_namefunc_docescaped_docs         r2   _decorate_with_tvm_ffir     s     	0$$.'
G  )00 ` `	8R	RRYRRRSSS 	`4X>>KNN^	^^k^^^___NN299Wr3   seqSequence[str] | str | Nonec                X    | g S t          | t                    r| gS t          |           S N)r'   r    rP   )r   s    r2   _str_seq2listr     s1    
{		C		 uCyyr3   Tbuild_directory	need_lockc
                (   d t          |          D             }
d t          |          D             }t          |
          }t          |          }|s|s
J d            |t          |          ng }|t          |          ng }|t          |          ng }|t          |          ng }|t          j                            dt          t          d                                                              }t          dd|
|i |||||	
  
        }t          |                                          |  d| z  }n!t          |          
                                }|                    dd	           |	rt          rt          d
          |	r7|	                                D ]"\  }}|| dz  }|                    |           #t!          | ||||||
||		  	        }|rt#          t          |dz                      nt%                      5  t'          t          |dz            |           t)          t          |                     t          rdnd}t          ||  | z  
                                          cddd           S # 1 swxY w Y   dS )z&Real implementation of build function.c                j    g | ]0}t          t          |                                                    1S r   r   r   rA   s     r2   r   z_build_impl.<locals>.<listcomp>  s0    NNNSa**++NNNr3   c                j    g | ]0}t          t          |                                                    1S r   r   r   s     r2   r   z_build_impl.<locals>.<listcomp>  s0    PPPc$q''//++,,PPPr3   z0Either cpp_files or cuda_files must be provided.NTVM_FFI_CACHE_DIR~/.cache/tvm-ffi_Tparentsexist_okz/CUBIN embedding is not yet supported on Windowsr   )	r   r   r   r   r   r   r   r   r   lockzbuild.ninjar   r   )r   rv   rP   rI   rJ   rK   r    r   
expanduserr8   r   mkdirrO   NotImplementedErrorr   write_bytesr   r   r   rD   r   )r   r   r   r   r   r   r   r   r   r   cpp_path_listcuda_path_listwith_cppr   extra_ldflags_listextra_cflags_listextra_cuda_cflags_listextra_include_paths_list	cache_dirsource_hashr   r   cubin_bytes
cubin_pathninja_sourcer   s                             r2   _build_implr    s1    ON]95M5MNNNMPPmJ6O6OPPPNM""H^$$ITyTT"TTT 0=0Im,,,r.:.F\***B8I8UT"3444[]<O<[t$7888ac JNN#6DAS<T<T<_<_<a<a8b8bcc	("$
 
 OO..00d3J3J[3J3JJ		))1133	OOD4O000  Uz U!"STTT  0'2'8'8':': 	0 	0#J"
%:%:%::J"";//// )&0(4!
 
 
L /8	J#i&())	*	*	*[]] ; ;S]233\BBBC	NN###"-ffI4.7799::; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s   A/JJJ)	cpp_sourcescuda_sourcesr   r   r   r   r   r   r   r  r	  .Mapping[str, str] | Sequence[str] | str | Nonec       	           t          |          }
d                    |
          }t          |
          }~
t          |          }d                    |          }t          |          }~|t          |          ng }|t          |          ng }|t          |          ng }|t          |          ng }|i }nKt	          |t
                    r|di}n1t	          |t                    rt          |          }nd |D             }|r!t          ||          }t          |i           }n t          |i           }t          ||          }|t          j
                            dt          t          d                                                              }t          ||dd||||||	
  
        }t          |                                          |  d| z  }n!t          |                                          }|                    dd	           t          |d
z                                            }t          |dz                                            }t#          t          |dz                      5  t%          ||           |rt%          ||           t'          | |r|gng |r|gng ||||t          |          d|	
  
        cddd           S # 1 swxY w Y   dS )a  Compile and build a C++/CUDA module from inline source code.

    This function compiles the given C++ and/or CUDA source code into a shared library. Both ``cpp_sources`` and
    ``cuda_sources`` are compiled to an object file, and then linked together into a shared library. It's possible to only
    provide cpp_sources or cuda_sources. The path to the compiled shared library is returned.

    The ``functions`` parameter is used to specify which functions in the source code should be exported to the tvm ffi
    module. It can be a mapping, a sequence, or a single string. When a mapping is given, the keys are the names of the
    exported functions, and the values are docstrings for the functions. When a sequence of string is given, they are
    the function names needed to be exported, and the docstrings are set to empty strings. A single function name can
    also be given as a string, indicating that only one function is to be exported.

    Extra compiler and linker flags can be provided via the ``extra_cflags``, ``extra_cuda_cflags``, and ``extra_ldflags``
    parameters. The default flags are generally sufficient for most use cases, but you may need to provide additional
    flags for your specific use case.

    The include dir of tvm ffi and dlpack are used by default for the compiler to find the headers. Thus, you can
    include any header from tvm ffi in your source code. You can also provide additional include paths via the
    ``extra_include_paths`` parameter and include custom headers in your source code.

    The compiled shared library is cached in a cache directory to avoid recompilation. The `build_directory` parameter
    is provided to specify the build directory. If not specified, a default tvm ffi cache directory will be used.
    The default cache directory can be specified via the `TVM_FFI_CACHE_DIR` environment variable. If not specified,
    the default cache directory is ``~/.cache/tvm-ffi``.

    Parameters
    ----------
    name
        The name of the tvm ffi module.
    cpp_sources
        The C++ source code. It can be a list of sources or a single source.
    cuda_sources
        The CUDA source code. It can be a list of sources or a single source.
    functions
        The functions in cpp_sources or cuda_source that will be exported to the tvm ffi module. When a mapping is
        given, the keys are the names of the exported functions, and the values are docstrings for the functions
        (use an empty string to skip documentation for specific functions). When a sequence or a single string is given, they are
        the functions needed to be exported, and the docstrings are set to empty strings. A single function name can
        also be given as a string. When cpp_sources is given, the functions must be declared (not necessarily defined)
        in the cpp_sources. When cpp_sources is not given, the functions must be defined in the cuda_sources. If not
        specified, no function will be exported.
    extra_cflags
        The extra compiler flags for C++ compilation.
        The default flags are:

        - On Linux/macOS: ['-std=c++17', '-fPIC', '-O2']
        - On Windows: ['/std:c++17', '/O2']

    extra_cuda_cflags
        The extra compiler flags for CUDA compilation.

    extra_ldflags
        The extra linker flags.
        The default flags are:

        - On Linux/macOS: ['-shared']
        - On Windows: ['/DLL']

    extra_include_paths
        The extra include paths.

    build_directory
        The build directory. If not specified, a default tvm ffi cache directory will be used. By default, the
        cache directory is ``~/.cache/tvm-ffi``. You can also set the ``TVM_FFI_CACHE_DIR`` environment variable to
        specify the cache directory.

    embed_cubin: Mapping[str, bytes], optional
        A mapping from CUBIN module names to CUBIN binary data. TVM-FFI provides a macro `TVM_FFI_EMBED_CUBIN(name)` to embed
        CUBIN data into the compiled shared library. The keys should match the names used in `TVM_FFI_EMBED_CUBIN(name)` calls
        in the C++ source code. The values are the CUBIN binary data bytes. The embedded CUBIN kernels can be accessed by
        the macro `TVM_FFI_EMBED_CUBIN_GET_KERNEL(name, kernel_name)` defined in the `tvm/ffi/extra/cuda/cubin_launcher.h` header.
        See the `examples/cubin_launcher` directory for examples how to use cubin launcher to launch CUBIN kernels in TVM-FFI.

    Returns
    -------
    lib_path: str
        The path to the built shared library.

    Example
    -------

    .. code-block:: python

        import torch
        from tvm_ffi import Module
        import tvm_ffi.cpp

        # define the cpp source code
        cpp_source = '''
             void add_one_cpu(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
               // implementation of a library function
               TVM_FFI_ICHECK(x.ndim() == 1) << "x must be a 1D tensor";
               DLDataType f32_dtype{kDLFloat, 32, 1};
               TVM_FFI_ICHECK(x.dtype() == f32_dtype) << "x must be a float tensor";
               TVM_FFI_ICHECK(y.ndim() == 1) << "y must be a 1D tensor";
               TVM_FFI_ICHECK(y.dtype() == f32_dtype) << "y must be a float tensor";
               TVM_FFI_ICHECK(x.size(0) == y.size(0)) << "x and y must have the same shape";
               for (int i = 0; i < x.size(0); ++i) {
                 static_cast<float*>(y.data_ptr())[i] = static_cast<float*>(x.data_ptr())[i] + 1;
               }
             }
        '''

        # compile the cpp source code and load the module
        lib_path: str = tvm_ffi.cpp.build_inline(
            name="hello",
            cpp_sources=cpp_source,
            functions="add_one_cpu",
        )

        # load the module
        mod: Module = tvm_ffi.load_module(lib_path)

        # use the function from the loaded module to perform
        x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)
        y = torch.empty_like(x)
        mod.add_one_cpu(x, y)
        torch.testing.assert_close(x + 1, y)

    ra   Nr   c                    i | ]}|d S )r   r   )r   r   s     r2   
<dictcomp>z build_inline.<locals>.<dictcomp>  s    777Tb777r3   r   r   r   Tr   zmain.cppzcuda.cur   F)
r   r   r   r   r   r   r   r   r   r   )r   rd   rv   rP   r'   r    r   dictr   rI   rJ   rK   r   r   r8   r   r   r   rD   r  )r   r  r	  r   r   r   r   r   r   r   cpp_source_listr   r   cuda_source_listr   r   r   r   r   r  function_mapr  r  r   cpp_file	cuda_files                             r2   build_inliner  "  sa   J $K00O?++JO$$H$\22)),--K%&&I0=0Im,,,r.:.F\***B8I8UT"3444[]<O<[t$7888ac ')	Is	#	# 8!2	Iw	'	' 8I77Y777 H+JEE
,["==+J;;
,[,GG JNN#6DAS<T<T<_<_<a<a8b8bcc	("$
 
 OO..00d3J3J[3J3JJ		))1133	OOD4O000I
*335566HY*335566I	#i&())	*	* 
 
Xz*** 	1K000$,4xjj"&/7	{{R*4, 8	NN#
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   7AKKK)
r  r	  r   r   r   r   r   r   r   keep_module_aliver  r   c       
        R    t          t          | |||||||||	
  
        |
          S )a  Compile, build and load a C++/CUDA module from inline source code.

    This function compiles the given C++ and/or CUDA source code into a shared library. Both ``cpp_sources`` and
    ``cuda_sources`` are compiled to an object file, and then linked together into a shared library. It's possible to only
    provide cpp_sources or cuda_sources.

    The ``functions`` parameter is used to specify which functions in the source code should be exported to the tvm ffi
    module. It can be a mapping, a sequence, or a single string. When a mapping is given, the keys are the names of the
    exported functions, and the values are docstrings for the functions. When a sequence of string is given, they are
    the function names needed to be exported, and the docstrings are set to empty strings. A single function name can
    also be given as a string, indicating that only one function is to be exported.

    Extra compiler and linker flags can be provided via the ``extra_cflags``, ``extra_cuda_cflags``, and ``extra_ldflags``
    parameters. The default flags are generally sufficient for most use cases, but you may need to provide additional
    flags for your specific use case.

    The include dir of tvm ffi and dlpack are used by default for the compiler to find the headers. Thus, you can
    include any header from tvm ffi in your source code. You can also provide additional include paths via the
    ``extra_include_paths`` parameter and include custom headers in your source code.

    The compiled shared library is cached in a cache directory to avoid recompilation. The `build_directory` parameter
    is provided to specify the build directory. If not specified, a default tvm ffi cache directory will be used.
    The default cache directory can be specified via the `TVM_FFI_CACHE_DIR` environment variable. If not specified,
    the default cache directory is ``~/.cache/tvm-ffi``.

    Parameters
    ----------
    name
        The name of the tvm ffi module.
    cpp_sources
        The C++ source code. It can be a list of sources or a single source.
    cuda_sources
        The CUDA source code. It can be a list of sources or a single source.
    functions
        The functions in cpp_sources or cuda_source that will be exported to the tvm ffi module. When a mapping is
        given, the keys are the names of the exported functions, and the values are docstrings for the functions
        (use an empty string to skip documentation for specific functions). When a sequence or a single string is given, they are
        the functions needed to be exported, and the docstrings are set to empty strings. A single function name can
        also be given as a string. When cpp_sources is given, the functions must be declared (not necessarily defined)
        in the cpp_sources. When cpp_sources is not given, the functions must be defined in the cuda_sources. If not
        specified, no function will be exported.
    extra_cflags
        The extra compiler flags for C++ compilation.
        The default flags are:

        - On Linux/macOS: ['-std=c++17', '-fPIC', '-O2']
        - On Windows: ['/std:c++17', '/O2']

    extra_cuda_cflags
        The extra compiler flags for CUDA compilation.

    extra_ldflags
        The extra linker flags.
        The default flags are:

        - On Linux/macOS: ['-shared']
        - On Windows: ['/DLL']

    extra_include_paths
        The extra include paths.

    build_directory
        The build directory. If not specified, a default tvm ffi cache directory will be used. By default, the
        cache directory is ``~/.cache/tvm-ffi``. You can also set the ``TVM_FFI_CACHE_DIR`` environment variable to
        specify the cache directory.

    embed_cubin
        A mapping from CUBIN module names to CUBIN binary data. When provided, the CUBIN data will be embedded
        into the compiled shared library using objcopy, making it accessible via the TVM_FFI_EMBED_CUBIN macro.
        The keys should match the names used in TVM_FFI_EMBED_CUBIN calls in the C++ source code.

    keep_module_alive
        Whether to keep the module alive. If True, the module will be kept alive
        for the duration of the program until libtvm_ffi.so is unloaded.

    Returns
    -------
    mod: Module
        The loaded tvm ffi module.

    See Also
    --------
    :py:func:`tvm_ffi.load_module`

    Example
    -------

    .. code-block:: python

        import torch
        from tvm_ffi import Module
        import tvm_ffi.cpp

        # define the cpp source code
        cpp_source = '''
             void add_one_cpu(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
               // implementation of a library function
               TVM_FFI_ICHECK(x.ndim() == 1) << "x must be a 1D tensor";
               DLDataType f32_dtype{kDLFloat, 32, 1};
               TVM_FFI_ICHECK(x.dtype() == f32_dtype) << "x must be a float tensor";
               TVM_FFI_ICHECK(y.ndim() == 1) << "y must be a 1D tensor";
               TVM_FFI_ICHECK(y.dtype() == f32_dtype) << "y must be a float tensor";
               TVM_FFI_ICHECK(x.size(0) == y.size(0)) << "x and y must have the same shape";
               for (int i = 0; i < x.size(0); ++i) {
                 static_cast<float*>(y.data_ptr())[i] = static_cast<float*>(x.data_ptr())[i] + 1;
               }
             }
        '''

        # compile the cpp source code and load the module
        mod: Module = tvm_ffi.cpp.load_inline(
            name="hello",
            cpp_sources=cpp_source,
            functions="add_one_cpu",
        )

        # use the function from the loaded module to perform
        x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)
        y = torch.empty_like(x)
        mod.add_one_cpu(x, y)
        torch.testing.assert_close(x + 1, y)

    )
r   r  r	  r   r   r   r   r   r   r   r  )r   r  )r   r  r	  r   r   r   r   r   r   r   r  s              r2   load_inliner    sQ    R #%%/' 3+#	
 	
 	
 ,   r3   )r   r   r   r   r   r   r   c               2    t          | |||||||d	  	        S )a  Compile and build a C++/CUDA module from source files.

    This function compiles the given C++ and/or CUDA source files into a shared library. Both ``cpp_files`` and
    ``cuda_files`` are compiled to object files, and then linked together into a shared library. It's possible to only
    provide cpp_files or cuda_files. The path to the compiled shared library is returned.

    Note that this function does not automatically export functions to the tvm ffi module. You need to
    manually use the TVM FFI export macros (e.g., ``TVM_FFI_DLL_EXPORT_TYPED_FUNC``) in your source files to export
    functions. This gives you more control over which functions are exported and how they are exported.

    Extra compiler and linker flags can be provided via the ``extra_cflags``, ``extra_cuda_cflags``, and ``extra_ldflags``
    parameters. The default flags are generally sufficient for most use cases, but you may need to provide additional
    flags for your specific use case.

    The include dir of tvm ffi and dlpack are used by default for the compiler to find the headers. Thus, you can
    include any header from tvm ffi in your source files. You can also provide additional include paths via the
    ``extra_include_paths`` parameter and include custom headers in your source code.

    The compiled shared library is cached in a cache directory to avoid recompilation. The `build_directory` parameter
    is provided to specify the build directory. If not specified, a default tvm ffi cache directory will be used.
    The default cache directory can be specified via the `TVM_FFI_CACHE_DIR` environment variable. If not specified,
    the default cache directory is ``~/.cache/tvm-ffi``.

    Parameters
    ----------
    name
        The name of the tvm ffi module.
    cpp_files
        The C++ source files to compile. It can be a list of file paths or a single file path. Both absolute and
        relative paths are supported.
    cuda_files
        The CUDA source files to compile. It can be a list of file paths or a single file path. Both absolute and
        relative paths are supported.
    extra_cflags
        The extra compiler flags for C++ compilation.
        The default flags are:

        - On Linux/macOS: ['-std=c++17', '-fPIC', '-O2']
        - On Windows: ['/std:c++17', '/MD', '/O2']

    extra_cuda_cflags
        The extra compiler flags for CUDA compilation.
        The default flags are:

        - ['-Xcompiler', '-fPIC', '-std=c++17', '-O2'] (Linux/macOS)
        - ['-Xcompiler', '/std:c++17', '/O2'] (Windows)

    extra_ldflags
        The extra linker flags.
        The default flags are:

        - On Linux/macOS: ['-shared', '-L<tvm_ffi_lib_path>', '-ltvm_ffi']
        - On Windows: ['/DLL', '/LIBPATH:<tvm_ffi_lib_path>', '<tvm_ffi_lib_name>.lib']

    extra_include_paths
        The extra include paths for header files. Both absolute and relative paths are supported.

    build_directory
        The build directory. If not specified, a default tvm ffi cache directory will be used. By default, the
        cache directory is ``~/.cache/tvm-ffi``. You can also set the ``TVM_FFI_CACHE_DIR`` environment variable to
        specify the cache directory.

    Returns
    -------
    lib_path: str
        The path to the built shared library.

    Example
    -------

    .. code-block:: python

        import torch
        from tvm_ffi import Module
        import tvm_ffi.cpp

        # Assume we have a C++ source file "my_ops.cpp" with the following content:
        # ```cpp
        # #include <tvm/ffi/container/tensor.h>
        # #include <tvm/ffi/dtype.h>
        # #include <tvm/ffi/error.h>
        # #include <tvm/ffi/extra/c_env_api.h>
        # #include <tvm/ffi/function.h>
        #
        # void add_one_cpu(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
        #   TVM_FFI_ICHECK(x.ndim() == 1) << "x must be a 1D tensor";
        #   DLDataType f32_dtype{kDLFloat, 32, 1};
        #   TVM_FFI_ICHECK(x.dtype() == f32_dtype) << "x must be a float tensor";
        #   TVM_FFI_ICHECK(y.ndim() == 1) << "y must be a 1D tensor";
        #   TVM_FFI_ICHECK(y.dtype() == f32_dtype) << "y must be a float tensor";
        #   TVM_FFI_ICHECK(x.size(0) == y.size(0)) << "x and y must have the same shape";
        #   for (int i = 0; i < x.size(0); ++i) {
        #     static_cast<float*>(y.data_ptr())[i] = static_cast<float*>(x.data_ptr())[i] + 1;
        #   }
        # }
        #
        # TVM_FFI_DLL_EXPORT_TYPED_FUNC(add_one_cpu, add_one_cpu);
        # ```

        # compile the cpp source file and get the library path
        lib_path: str = tvm_ffi.cpp.build(
            name="my_ops",
            cpp_files="my_ops.cpp",
        )

        # load the module
        mod: Module = tvm_ffi.load_module(lib_path)

        # use the function from the loaded module
        x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)
        y = torch.empty_like(x)
        mod.add_one_cpu(x, y)
        torch.testing.assert_close(x + 1, y)

    T)	r   r   r   r   r   r   r   r   r   )r  r   r   r   r   r   r   r   r   s           r2   buildr    s7    | !+#/'
 
 
 
r3   )r   r   r   r   r   r   r   r  c               N    t          t          | |||||||          |          S )a*  Compile, build and load a C++/CUDA module from source files.

    This function compiles the given C++ and/or CUDA source files into a shared library and loads it as a tvm ffi
    module. Both ``cpp_files`` and ``cuda_files`` are compiled to object files, and then linked together into a shared
    library. It's possible to only provide cpp_files or cuda_files.

    Note that this function does not automatically export functions to the tvm ffi module. You need to
    manually use the TVM FFI export macros (e.g., :c:macro:`TVM_FFI_DLL_EXPORT_TYPED_FUNC`) in your source files to export
    functions. This gives you more control over which functions are exported and how they are exported.

    Extra compiler and linker flags can be provided via the ``extra_cflags``, ``extra_cuda_cflags``, and ``extra_ldflags``
    parameters. The default flags are generally sufficient for most use cases, but you may need to provide additional
    flags for your specific use case.

    The include dir of tvm ffi and dlpack are used by default for the compiler to find the headers. Thus, you can
    include any header from tvm ffi in your source files. You can also provide additional include paths via the
    ``extra_include_paths`` parameter and include custom headers in your source code.

    The compiled shared library is cached in a cache directory to avoid recompilation. The `build_directory` parameter
    is provided to specify the build directory. If not specified, a default tvm ffi cache directory will be used.
    The default cache directory can be specified via the `TVM_FFI_CACHE_DIR` environment variable. If not specified,
    the default cache directory is ``~/.cache/tvm-ffi``.

    Parameters
    ----------
    name
        The name of the tvm ffi module.
    cpp_files
        The C++ source files to compile. It can be a list of file paths or a single file path. Both absolute and
        relative paths are supported.
    cuda_files
        The CUDA source files to compile. It can be a list of file paths or a single file path. Both absolute and
        relative paths are supported.
    extra_cflags
        The extra compiler flags for C++ compilation.
        The default flags are:

        - On Linux/macOS: ['-std=c++17', '-fPIC', '-O2']
        - On Windows: ['/std:c++17', '/MD', '/O2']

    extra_cuda_cflags
        The extra compiler flags for CUDA compilation.
        The default flags are:

        - ['-Xcompiler', '-fPIC', '-std=c++17', '-O2'] (Linux/macOS)
        - ['-Xcompiler', '/std:c++17', '/O2'] (Windows)

    extra_ldflags
        The extra linker flags.
        The default flags are:

        - On Linux/macOS: ['-shared', '-L<tvm_ffi_lib_path>', '-ltvm_ffi']
        - On Windows: ['/DLL', '/LIBPATH:<tvm_ffi_lib_path>', '<tvm_ffi_lib_name>.lib']

    extra_include_paths
        The extra include paths for header files. Both absolute and relative paths are supported.

    build_directory
        The build directory. If not specified, a default tvm ffi cache directory will be used. By default, the
        cache directory is ``~/.cache/tvm-ffi``. You can also set the ``TVM_FFI_CACHE_DIR`` environment variable to
        specify the cache directory.

    keep_module_alive
        Whether to keep the module alive. If True, the module will be kept alive
        for the duration of the program until libtvm_ffi.so is unloaded.

    Returns
    -------
    mod: Module
        The loaded tvm ffi module.

    See Also
    --------
    :py:func:`tvm_ffi.load_module`

    Example
    -------

    .. code-block:: python

        import torch
        from tvm_ffi import Module
        import tvm_ffi.cpp

        # Assume we have a C++ source file "my_ops.cpp" with the following content:
        # ```cpp
        # #include <tvm/ffi/container/tensor.h>
        # #include <tvm/ffi/dtype.h>
        # #include <tvm/ffi/error.h>
        # #include <tvm/ffi/extra/c_env_api.h>
        # #include <tvm/ffi/function.h>
        #
        # void add_one_cpu(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
        #   TVM_FFI_ICHECK(x.ndim() == 1) << "x must be a 1D tensor";
        #   DLDataType f32_dtype{kDLFloat, 32, 1};
        #   TVM_FFI_ICHECK(x.dtype() == f32_dtype) << "x must be a float tensor";
        #   TVM_FFI_ICHECK(y.ndim() == 1) << "y must be a 1D tensor";
        #   TVM_FFI_ICHECK(y.dtype() == f32_dtype) << "y must be a float tensor";
        #   TVM_FFI_ICHECK(x.size(0) == y.size(0)) << "x and y must have the same shape";
        #   for (int i = 0; i < x.size(0); ++i) {
        #     static_cast<float*>(y.data_ptr())[i] = static_cast<float*>(x.data_ptr())[i] + 1;
        #   }
        # }
        #
        # TVM_FFI_DLL_EXPORT_TYPED_FUNC(add_one_cpu, add_one_cpu);
        # ```

        # compile the cpp source file and load the module
        mod: Module = tvm_ffi.cpp.load(
            name="my_ops",
            cpp_files="my_ops.cpp",
        )

        # use the function from the loaded module
        x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)
        y = torch.empty_like(x)
        mod.add_one_cpu(x, y)
        torch.testing.assert_close(x + 1, y)

    r  r  )r   r  )	r   r   r   r   r   r   r   r   r  s	            r2   loadr    sK    H !%/' 3+		
 		
 		
 ,   r3   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    )r9   r    r:   r    r   r#   )r   r    )r^   rs   rt   ru   r_   rv   r   rw   )r   r    r   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    )r   r    r   r#   )r   r    r   r    )r   r    r   r   r   r    )r   r   r   rs   )TN)r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rv   r   r   r   r    )r   r    r  r   r	  r   r   r
  r   r   r   r   r   r   r   r   r   r   r   r   r   r    )r   r    r  r   r	  r   r   r
  r   r   r   r   r   r   r   r   r   r   r   r   r  rv   r   r   )r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    )r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  rv   r   r   )0__doc__
__future__r   	functoolsr5   rI   rL   re   r   collections.abcr   r   
contextlibr   pathlibr   typingr   tvm_ffi.libinfor	   r
   r   tvm_ffi.moduler   r   tvm_ffi.utilsr   platformrO   r8   rD   	lru_cacherX   rr   r   r   r   r    	maketransr   r   r   r   r  r  r  r  r  r   r3   r2   <module>r+     sU  " I H " " " " " "      				      



 - - - - - - - - " " " " " "             X X X X X X X X X X . . . . . . . . " " " " " "\W$
 /34 4 4 4 4n	 	 	 	    89 9 9 94< < < <P /3g g g g gT+ + + +, MM   * * * *
   2   $ .2M; M; M; M; M;f /3/3@D)-.2*.04"&.2M
 M
 M
 M
 M
 M
f /3/3@D)-.2*.04"&.2"W W W W W Wz -1-1)-.2*.04"&H H H H H H\ -1-1)-.2*.04"&"P P P P P P P Pr3   