
    `i2                        d dl Z d dlZd dlmc mZ d dlmZ d dlm	Z	 ddl
mZmZ ddlmZ  e j        e          Zdej        dej        d	efd
Zdej        dej        ded	dfdZdej        d	ej        fdZ e	eeef                              Zdej        d	dfdZdej        d	efdZdej        dedefdZdej        fdZ d Z!dS )    N)is_symbolic)
OrderedSet   )configir)Vxcomm_buffer_typereturnc                 6   t          |           }t          |t          j                  rdS |                                }t          |t          j                  rdS t          |t          j                  r#t          |                                          sdS dS )ze
    Check if an input can be realized as a comm buffer of the specified
    `comm_buffer_type`.
    TF)		_get_data
isinstancer   Loopsget_output_specCommBufferLayoutFlexibleLayoutr   	get_numel)r	   r
   datalayouts       q/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/_inductor/comm_lowering.pycan_realize_as_comm_bufferr   7   s     Q<<D$!! t!!##F&"-.. t&"+,, [AQAQ5R5R t5    
group_namec                    |                                   t          |           }t          |t          j                  sJ |                                }t          |t          j                  rdS t          |t          j                  st          d| d          t          |
                                          rt          d| d          t          j        |||          |_        dS )z
    Realize an input as a comm buffer of the specified `comm_buffer_type`.

    Specifically, this realizes the underlying buffer if it's still unrealized
    and changes the layout of the buffer to `ir.CommBufferLayout`.
    NzOA buffer can only be realized as a comm buffer if it has `FlexibleLayout` (got ).zGA buffer with symbolic shape cannot be converted to a comm buffer (got )r   r
   r   )realizer   r   r   Bufferr   r   r   AssertionErrorr   r   r   )r	   r
   r   bufferr   s        r   realize_as_comm_bufferr    M   s    IIKKKq\\Ffbi(((((##%%F&"-.. fb/00 
4)/4 4 4
 
 	

 6##%%&& 
-"(- - -
 
 	

 ')  FMMMr   c                 V   t          | j        t          j                  rH| j                                        }t          |t          j        t          j        f          sJ |j        S t          | j        t          j                  r| j        j        S t          d| j         d          )Nz\Expect the data attr of a `TensorBox` to be either an `ir.BaseView` or `ir.StorageBox` (got r   )r   r   r   BaseViewunwrap_view
MutableBox
StorageBoxr   )r	   nodes     r   r   r   q   s    !&"+&& 
v!!##$bm <=====y	AFBM	*	* 
v{C89C C C
 
 	
r   c                     t                               t          t          j                  |                                 f           dS )z
    If a non-blocking collective is lowered as a blocking collective, the wait
    node in the original graph becomes useless and we can skip the lowering it.
    N)_bufs_to_skip_waitaddidr   graphget_namer	   s    r   mark_as_skip_waitr.      s3    
 BqwKK677777r   c                 j    t          t          j                  |                                 ft          v S N)r*   r   r+   r,   r(   r-   s    r   should_skip_waitr1      s#    qwKK&*<<<r   inp	reduce_opc                    ddl m} |                                 |                                 j        z  }t
          j        j        oB ||          o7t          | t          j
        j                  o|dv o|t
          j        j        k    S )Nr   )is_symm_mem_enabled_for_group)sum)#torch.distributed._symmetric_memoryr5   r   	get_dtypeitemsizer   _collectiveauto_selectr   r   CommBufferTypeSYMM_MEM#one_shot_all_reduce_threshold_bytes)r2   r3   r   r5   inp_sizes        r   $_should_lower_as_one_shot_all_reducer@      s     RQQQQQ}}!99H& 	O))*55	O&sB,=,FGG	O !	O *NNr   c           	          t          | t          j        j        |           t	          j        t          j        j        t          j                            t          j
        j        j        j        | ||                    S r0   )r    r   r<   r=   pytreetree_map	TensorBoxcreateFallbackKerneltorchopssymm_memone_shot_all_reducedefault)r2   r3   r   s      r   _one_shot_all_reducerL      sc    3 1 :JGGG?

  I2:		
 	
  r   c            	         	 t           j        j        j         n+# t          $ r t
                              d           Y d S w xY wddlmm	m
mm fd} t           j        j         | j                  dt          j        dt          dt          dt          j        ffd	            } | j                  dt          j        dt          dt          dt          j        ffd
            } | j                  fd            } | j                  fd            }dt          j        fd | j                  fd            } | j                  fd            } | j                  fd            } | j                  fd            } | j                  fd            }	 | j                  fd            }
 | j                  fd            } | j                  fd            } | t           j        j        j                  fd            } | j                  fd            }d S )NzRInductor support for distributed collectives depends on building torch.distributedr   )add_layout_constraintcloneconstrain_to_fx_stridescopy_register_loweringc                 2     |             |           S r0    )fnrN   rP   rR   s    r   register_comm_loweringz7register_comm_lowerings.<locals>.register_comm_lowering   s(    b"9:::  $$$r   r2   r3   r   r   c                    t          | ||          rt          | ||          S  |           } t          j        rJ|                                  t
          j        j                            | 	                                           t          j                            |           } t          j                            j        j        | ||           | S r0   )r@   rL   r    reorder_for_compute_comm_overlapr   r   r+   no_fuse_buffer_namesr)   r,   r   ExternKernelrequire_contiguous_AllReduce_Kernelcreate_inplaceall_reduce_rK   )r2   r3   r   c10drO   s      r   _all_reducez,register_comm_lowerings.<locals>._all_reduce   s    /Y
KK 	D'Y
CCC eCjj2 	= KKMMMG(,,S\\^^<<<o0055 	++$		
 	
 	
 
r   c                    t          | ||          r, | t          | ||                    }t          |           | S t          j                            |           } t          j                            j        j	        | ||           | S r0   )
r@   rL   r.   r   rZ   r[   r\   r]   r^   rK   )r2   r3   r   retr_   rQ   s       r   _all_reduce_z-register_comm_lowerings.<locals>._all_reduce_   s     0Y
KK 	%$S)Z@@ C c"""J o0055
++$		
 	
 	
 
r   c                 |    fd| D             } t           j                            j        j        | ||           | S )Nc                 &    g | ]} |          S rT   rT   ).0r2   rO   s     r   
<listcomp>zJregister_comm_lowerings.<locals>._all_reduce_coalesced.<locals>.<listcomp>   s!    ///%%**///r   r   _CollectiveKernelr]   all_reduce_coalesced_rK   )inputsr3   r   r_   rO   s      r   _all_reduce_coalescedz6register_comm_lowerings.<locals>._all_reduce_coalesced   sP    ///////
++&.		
 	
 	
 r   c                 `    t           j                            j        j        | ||           | S r0   rh   )rk   r3   r   r_   s      r   _all_reduce_coalesced_z7register_comm_lowerings.<locals>._all_reduce_coalesced_   s7    
++&.		
 	
 	
 r   c                     t          j        j        | |g|R  }t          |t           j                  sJ t           j                            |          S r0   )r   ri   create_out_of_placer   IRNoderD   rE   )kernelrk   argsr&   s       r   _create_out_of_placez5register_comm_lowerings.<locals>._create_out_of_place	  sM    #7NNNN$	*****|""4(((r   c                 4     j         j        | ||          S r0   )all_gather_into_tensorrK   )r2   
group_sizer   rt   r_   s      r   _all_gather_into_tensorz8register_comm_lowerings.<locals>._all_gather_into_tensor  s*    ##'/	
 
 	
r   c           	          t          j        t          j        j        t          j                            j        j        | ||                    S r0   )	rB   rC   r   rD   rE   ri   rp    all_gather_into_tensor_coalescedrK   )rk   rw   r   r_   s      r   !_all_gather_into_tensor_coalescedzBregister_comm_lowerings.<locals>._all_gather_into_tensor_coalesced  sG    L 445=	 
 
 	
r   c                d    t           j                            j        j        | |||           |S )N)out)r   ri   r]   all_gather_into_tensor_outrK   )r2   rw   r   r}   r_   s       r   _all_gather_into_tensor_outz<register_comm_lowerings.<locals>._all_gather_into_tensor_out#  s?    
+++3 	, 	
 	
 	
 
r   c                 6     j         j        | |||          S r0   )reduce_scatter_tensorrK   )r2   r3   rw   r   rt   r_   s       r   _reduce_scatter_tensorz7register_comm_lowerings.<locals>._reduce_scatter_tensor.  s-    ##&.
 
 	
r   c           
          t          j        t          j        j        t          j                            j        j        | |||                    S r0   )	rB   rC   r   rD   rE   ri   rp   reduce_scatter_tensor_coalescedrK   )rk   r3   rw   r   r_   s       r    _reduce_scatter_tensor_coalescedzAregister_comm_lowerings.<locals>._reduce_scatter_tensor_coalesced8  sJ    L 444< 	
 	
 		
r   c                 6     j         j        | |||          S r0   )all_to_all_singlerK   )r2   output_split_sizesinput_split_sizesr   rt   r_   s       r   _all_to_all_singlez3register_comm_lowerings.<locals>._all_to_all_singleE  s-    ##"*
 
 	
r   c                 v     |           } t           j                            j        j        | ||           | S r0   r   ri   r]   
broadcast_rK   )r2   srcr   r_   rO   s      r   
_broadcastz+register_comm_lowerings.<locals>._broadcastO  s@    eCjj
++O#S#z	
 	
 	
 
r   c                 `    t           j                            j        j        | ||           | S r0   r   )r2   r   r   r_   s      r   _broadcast_z,register_comm_lowerings.<locals>._broadcast_W  s3    
++O#S#z	
 	
 	
 
r   c                 T     t           j        j        j        j        | |||          S r0   )rG   rH   _dtensorshard_dim_alltoallrK   )r2   
gather_dim	shard_dimr   rt   s       r   _shard_dim_alltoallz4register_comm_lowerings.<locals>._shard_dim_alltoall^  s2    ##I19
 
 	
r   c                 ~    t          |           r| S t          j                            j        j        |            | S r0   )r1   r   _WaitKernelcreate_waitwait_tensorrK   )r2   r_   s    r   _wait_tensorz-register_comm_lowerings.<locals>._wait_tensorh  s;    C   	J
""4#3#;SAAA
r   )rG   rH   _c10d_functional
all_reduceAttributeErrorloginfoloweringrN   rO   rP   rQ   rR   r   rD   strr^   all_reduce_coalescedrj   rq   rv   rz   r~   r   r   r   	broadcastr   r   r   r   )rV   r`   rc   rl   rn   rx   r{   r   r   r   r   r   r   r   r   rt   rN   r_   rO   rP   rQ   rR   s                  @@@@@@@r   register_comm_loweringsr      sF   	"---    	
 	
 	
 	             % % % % % % % 9%DDO,, # 3 2<       -,2 D,--\&)7:	      .-* D566     76 D677    87)ry ) ) ) )
 D788
 
 
 
 
 98
 DABB	
 	
 	
 	
 CB	
 D;<<    =< D677
 
 
 
 
 87
 D@AA

 

 

 

 BA

 D233
 
 
 
 
 43
 DN++     ,+ DO,,    -, EI.ABB
 
 
 
 CB
 D,--    .-  s     $AA)"loggingrG   torch.utils._pytreeutils_pytreerB   torch._inductor.utilsr   torch.utils._ordered_setr    r   r   virtualizedr   	getLogger__name__r   rD   r<   boolr   r   r    rq   r   tupleintr(   r.   r1   r@   rL   r   rT   r   r   <module>r      s     $ $ $ $ $ $ $ $ $ - - - - - - / / / / / /               g!!T	|')'8	   ,!	|!')'8!FI!	! ! ! !H
 
") 
 
 
 
  1Zc3h022 8 8t 8 8 8 8=	 =d = = = =	"%36   
bl 
 
 
 
B B B B Br   