
    Pi                         d dl Z d dlZd dlmZ d dlmZ ej        dej        dej        fd            Z	ej        dej        dej        fd            Z
 G d de          Zdd
Zd ZdS )    N)Functioniters
BLOCK_SIZEc	                 
   t          j        d          }	t          j        d|          }
t          j        d|          }|
d d d f         |k     |d d d f         |k     z  }| |	|z  z   }t          j        ||
d d d f         |z  z   |d d d f         |z  z   |d          }t          j        |          D ]e}t          j        t          j        ||d          d          }t          j        ||d d d f         z
            }t          j        ||d          }|t          j        t          j	        |d                    z   }||d d d f         z
  }t          j        ||d          }t          j        t          j        ||d          d          }t          j        ||d d d f         z
            }t          j        ||d          }|t          j        t          j	        |d                    z   }||d d d f         z
  }t          j        ||d          }gt          j        |          }t          j        ||d          }||	|z  z   }t          j
        ||
d d d f         |z  z   |d d d f         |z  z   ||           d S )Nr   g    _©maskotheraxis           r   )tl
program_idarangeloadstatic_rangemaxwhereexplogsumstore)	input_ptr
output_ptrMNstride_bstride_mstride_nr   r   pid_boffs_moffs_nr   curr_input_ptr	log_alpha_col_maxexp_weights_colcol_lserow_maxexp_weights_rowrow_lseresult_alphacurr_output_ptrs                           u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/hyper_connections/triton_sinkhorn.pysinkhorn_kernel_forward_logr0      s    M!EYq*%%FYq*%%F111d7OaF47Oa$78D!11N48)CCfTSTSTSTWoX`F``gksxyyyI _U## 5 5&$	599BBB&WT111W-=!=>>(4#>>BF26/#B#B#BCCCaaa 00	HT9e44	 &$	599BBB&WQQQW-=!=>>(4#>>BF26/#B#B#BCCC4 00	HT9e44		6)$$L8D,44L 58#33OH_vaaag99F47Oh<VVXdkopppppp    c
                    t          j        d          }
t          j        d|	          }t          j        d|	          }|d d d f         |k     |d d d f         |k     z  }||
|z  z   }| |
|z  z   }t          j        ||d d d f         |z  z   |d d d f         |z  z   |d          }t          j        ||d d d f         |z  z   |d d d f         |z  z   |d          }t          j        ||d          }t          j        ||d          }t          j        |          D ]}t          j        t          j        |||z  d          d          }||d d d f         z
  }t          j        ||d          }t          j        t          j        |||z  d          d          }||d d d f         z
  }t          j        ||d          }||z  }||
|z  z   }t          j        ||d d d f         |z  z   |d d d f         |z  z   ||           d S )Nr   r   r   r   r
   r   )r   r   r   r   r   r   r   r   )grad_output_ptrr   grad_input_ptrr   r   r   r   r    r   r   r!   r"   r#   r   r.   curr_grad_output_ptralpha
grad_alphar&   row_sum_grad_alphacol_sum_grad_alpha
grad_inputcurr_grad_input_ptrs                          r/   sinkhorn_kernel_backward_logr<   0   si    M!EYq*%%FYq*%%F111d7OaF47Oa$78D 58#33O*UX-==GOfQQQWo&@@6$PQPQPQ'?U]C]]dhpstttE-qqq$w(0JJVTXZ[Z[Z[T[__gMggnrz}~~~J HT5#&&E$
C00J_U## 
5 
5  VBHT:3Es$K$KRSTTT"4QQQW"==
XdJ44
  VBHT:3Es$K$KRSTTT"4T111W"==
XdJ44

#J(58+;;H 6!!!T'?X#==tQQQwRZ@ZZ\fmqrrrrrrr1   c                   <    e Zd Zedd            Zed             ZdS )TritonSinkhornFunction   c                    |j         dd          \  }}t          ||          dk    rddlm}  |||          S |j         d d         }|                    d||                                          }|j         d         }t          j        |          }	t          dt          j	        t          ||                              }
t          |f         ||	|||                    d          |                    d          |                    d          ||
d	

  
         |                     |	           || _         |	j        g |||R  S )N   r   )log_domain_sinkhorn_knopps    r         r   r   	num_warps)shaper   hyper_connections.mHCv2rC   view
contiguoustorch
empty_liketritonnext_power_of_2r0   stridesave_for_backwardr   )ctxr%   r   r   r   rC   batch_shapelog_alpha_flatBoutputr   s              r/   forwardzTritonSinkhornFunction.forward[   sa    rss#1q!99s??KKKKKK..y%@@@ocrc*"Aq11<<>> #!.11V3C1II>>??
#QD)q!!!$$n&;&;A&>&>@U@UVW@X@X!	
 	
 	
 	
 	f%%%	v{.K..A....r1   c                    | j         \  }| j        }|j        \  }}}t          dt	          j        t          ||                              }|                                }t          j        |          }t          |f         |
                    |||          |||||                    d          |                    d          |                    d          ||d           |                    |          d fS )NrE   r   r   rF   rG   rH   )saved_tensorsr   rJ   r   rP   rQ   rM   rN   rO   r<   rL   rR   view_as)	rT   grad_outputrX   r   rW   r   r   r   r:   s	            r/   backwardzTritonSinkhornFunction.backwardx   s    #	,1aV3C1II>>??
 ",,..%f--
$aT*Q1%%qa  *"3"3A"6"6
8I8I!8L8L!		
 		
 		
 		
 !!+..44r1   Nr?   )__name__
__module____qualname__staticmethodrY   r^    r1   r/   r>   r>   Z   sM        / / / \/8 5 5 \5 5 5r1   r>   r?   c                     | j         r,	 t                              | |          S # t          $ r Y nw xY wddlm}  || |          S )Nr   )sinkhorn_knopps)r   )is_cudar>   apply	ExceptionrK   rf   )r%   r   rf   s      r/   triton_sinkhornrj      sq     	)//	5AAA 	 	 	D	 877777?9e4444s   $ 
11c                  j    	 dd l } t          j                                        S # t          $ r Y dS w xY w)Nr   F)rP   rN   cudais_availableImportError)rP   s    r/   is_triton_availablero      sF    z&&(((   uus   !$ 
22r_   )rN   rP   triton.languagelanguager   torch.autogradr   jit	constexprr0   r<   r>   rj   ro   rd   r1   r/   <module>ru      s           # # # # # #'q
 <'q 'q 'q 'q 'qR 's <'s 's 's 's 'sR45 45 45 45 45X 45 45 45l	5 	5 	5 	5    r1   