
    )`i                        d Z ddlZddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ej        d             Zd	ee         d
ej        dededef
dZdeddfdZdedej        dej        dedededdfdZdeee         ee         f         fdZdedee         ddfdZdedeee                  deee                  ddfdZdefdZdS )a3  
Copyright (c) 2025 by FlashInfer team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
    N)SimpleNamespace)ListTuple   )gen_vllm_comm_module)register_custom_opc                  4   t                                                      t          dg d          dt          t                   dt
          j        dt          dt          dt          f
fd	            } t          d
dg          dt          dd ffd            }t          ddg          dt          dt          t          t                   t          t                   f         ffd            }t          dddg          dt          dt          t                   dd ffd            }t          dg d          dt          dt          t          t                            dt          t          t                            dd ffd            }t          dg           dt          ffd            }t          dg d          dt          dt
          j        dt
          j        dt          dt          dt          dd ffd             }t          | ||||||!          S )"Nzflashinfer::init_custom_ar)ipc_ptrs	rank_datarankfull_nvlink)mutates_argsr
   r   r   r   returnc                 4                         | |||          S N)init_custom_ar)r
   r   r   r   modules       k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/comm/vllm_ar.pyr   z,get_vllm_comm_module.<locals>.init_custom_ar    s     $$Xy$LLL    zflashinfer::disposefac                 2                         |            d S r   )dispose)r   r   s    r   r   z%get_vllm_comm_module.<locals>.dispose)   s    rr   z%flashinfer::get_graph_buffer_ipc_metac                 p                         |           \  }}t          |          t          |          fS r   )get_graph_buffer_ipc_metalist)r   output_bytesoutput_offsetsr   s      r   r   z7get_vllm_comm_module.<locals>.get_graph_buffer_ipc_meta-   s6    '-'G'G'K'K$nL!!4#7#777r   zflashinfer::register_bufferfake_ipc_ptrsc                 0                         | |          S r   )register_buffer)r   r   r   s     r   r    z-get_vllm_comm_module.<locals>.register_buffer2   s     %%b-888r   z"flashinfer::register_graph_buffersr   handlesoffsetsr"   r#   c                 6                         | ||           d S r   )register_graph_buffers)r   r"   r#   r   s      r   r%   z4get_vllm_comm_module.<locals>.register_graph_buffers8   s#     	%%b'7;;;;;r   zflashinfer::meta_sizec                  ,                                      S r   )	meta_size)r   s   r   r'   z'get_vllm_comm_module.<locals>.meta_sizeA   s    !!!r   zflashinfer::all_reduce)out
reg_bufferreg_buffer_sz_bytesinpr(   r)   r*   num_ctasc                 <                         | |||||           d S r   )
all_reduce)r   r+   r(   r)   r*   r,   r   s         r   r.   z(get_vllm_comm_module.<locals>.all_reduceE   s*     	"c3
4GRRRRRr   )r   r   r   r    r%   r'   r.   )
r   build_and_loadr   r   inttorchTensorboolr   r   )r   r   r   r    r%   r'   r.   r   s          @r   get_vllm_comm_moduler4      s"   !##2244F $EEE  Ms)M(-M<?MNRM	M M M M M	 M
 -TFCCCC D      DC ?tfUUU8c 8eDItCy4H.I 8 8 8 8 8 VU8 %T?4K  9C 9S	 9d 9 9 9 9 9 9 ,111  <<tCy/<48cO<	< < < < <	 <
 /bAAA"s " " " " " BA"  AAA  SS\S \S 	S
 !S S 
S S S S S	 S %";'5   r   ipc_tensorsr   r   r   r   c                 J    t                                          | |||          S r   )r4   r   )r5   r   r   r   s       r   r   r   ^   s+      !!00Yk  r   r   c                 H    t                                          |            d S r   )r4   r   r   s    r   r   r   f   s"    ""2&&&&&r   r+   r(   r)   r*   r,   c                 R    t                                          | |||||           dS )a  Performs an out-of-place all reduce.

    Args:
        fa: The handle to the custom all reduce.
        inp: The input tensor to all reduce.
        out: The output tensor to all reduce.
        reg_buffer: The register buffer to all reduce.
        reg_buffer_sz_bytes: The size of the register buffer.
        num_ctas: The number of CTAs to use for the all reduce.
        CTA upper bounds: 36. Generally, we can saturate the bandwidth even with small amount the SMs.
    N)r4   r.   )r   r+   r(   r)   r*   r,   s         r   r.   r.   j   s:    & %%
Cj"5x    r   c                 D    t                                          |           S r   )r4   r   r8   s    r   r   r      s    !!;;B???r   r   c                 F    t                                          | |          S r   )r4   r    )r   r   s     r   r    r       s    !!11"mDDDr   r"   r#   c                 L    t                                          | ||           d S r   )r4   r%   r!   s      r   r%   r%      s(     11"gwGGGGGr   c                  B    t                                                      S r   )r4   r'    r   r   r'   r'      s    !!++---r   )__doc__	functoolstypesr   typingr   r   r1   jit.commr   utilsr   cacher4   r0   r2   r3   r   r   r.   r   r    r%   r'   r>   r   r   <module>rF      s'         ! ! ! ! ! !          + + + + + + & & & & & & ? ? ?Dc',|;>MQ   ' ' ' ' ' '	 
 	
   
   0@U49d3i+?%@ @ @ @ @E EDI E$ E E E EHH49oH04T#YH	H H H H.3 . . . . . .r   