
    `ix                     $   d dl Z d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 dad Zd	 Zd
 Zd)dZd)dZd Zd)dZd Zd)dZd)dZd)dZd)dZd Zd Zd Zd Zd Zd Zd Zd Zd Z d*dZ!d Z"d  Z#d! Z$d+d$Z%d)d%Z&d,d'Z'd-d(Z(dS ).    N)linalg)_core)cublas)device)_util   c                      t           S N_batched_gesv_limit     _/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/cupy/cublas.pyget_batched_gesv_limitr      s    r   c                 
    | a d S r
   r   )limits    r   set_batched_gesv_limitr      s    r   c                 	   t          j        | |           t          j        |            t          j        |            | j        |j        k    s| j        |j        dz   k    r(| j        dd         |j        d| j        dz
           k    st          d          t          j        | |          \  }}|j        dk    rt          j
        |j        |          S |dk    rd}n*|dk    rd}n!|d	k    rd
}n|dk    rd}nt          d          t          t          |dz             }t          t          |dz             }| j        dk    r!t          j        | j        dd                   nd}| j        d         }| j        |j        k    r|j        d         nd}	|j        }
| j        j        }|j        j        }t          j        |                     |||                              ddd          |          } t          j        |                    |||	                              ddd          |          }| j        j        |k    r|                                 } |j        j        |k    r|                                }|t-                      k    r4t/          j        d                    |t-                                           t5          j                    }|}||z  | j        z  }t          j        | j        j        | j        j        ||z  z   |t          j                  }|}||	z  |j        z  }t          j        |j        j        |j        j        ||z  z   |t          j                  }t          j
        ||ft          j                  }t          j
        |ft          j                  }t          j
        dt          j                  } ||||j        j        ||j        j        |j        j        |           t          j         ||            ||t          j!        ||	|j        j        ||j        j        |j        j        ||j"        j        |           |d         dk    rYd                    |j#                  }|d         dk     r|d                    |d                    z  }tI          j%        |          |                    ddd                              |
          &                    |d          S )a  Solves multiple linear matrix equations using cublas<t>getr[fs]Batched().

    Computes the solution to system of linear equation ``ax = b``.

    Args:
        a (cupy.ndarray): The matrix with dimension ``(..., M, M)``.
        b (cupy.ndarray): The matrix with dimension ``(..., M)`` or
            ``(..., M, K)``.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(..., M)`` or ``(..., M, K)``.
       NzEa must have (..., M, M) shape and b must have (..., M) or (..., M, K)r   fsdFcDzinvalid dtypegetrfBatchedgetrsBatched   dtypez/The matrix size ({}) exceeds the set limit ({}))r   z Error reported by {} in cuBLAS. z)The {}-th parameter had an illegal value.F)copy)'r   _assert_cupy_array_assert_stacked_2d_assert_stacked_squarendimshape
ValueErrorlinalg_common_typesizecupyempty	TypeErrorgetattrr   numpyproddataptrascontiguousarrayreshape	transposer%   r   warningswarnformatr   get_cublas_handleitemsizearangeuintpint323_check_cublas_info_array_if_synchronization_allowedCUBLAS_OP_Nctypes__name__r   LinAlgErrorastype)abr$   	out_dtypetgetrfgetrsbsnnrhsb_shape
a_data_ptr
b_data_ptrhandleldaa_stepa_arrayldbb_stepb_arraypivotdinfoinfomsgs                           r   batched_gesvr^      s    
Q"""	Q	 ### 
16		QVqvz11GCRCLAGKQVaZK000  	 /155E9v{{z!'9---||	#	#	#(((FA.//EFA.//E%&VaZZAGCRCL	!	!	!QB	A&AF**172;;DgGJJqyyQ22<<Q1EE%*	, 	, 	,AqyyQ55??1aHH%*	, 	, 	,AvzZFFHHvzZFFHH!####GfQ 6 8 899	; 	; 	; %''F
C1Wqz!Fk!&*afj6B;&> $
, , ,G
C4Z!*$Fk!&*afj6B;&> $
, , ,GJAwek222EJuEK000E;t5;///D	E&!W\%sEJNEJNBOOO	=eUKKK	E&&$aw|/?
*.',*C1A2G G GAw!||077GG7Q;;>EEtAwhOOOC %%%;;q!Q''0077	7NNNr   c                 $    t          | |d          S )zFinds the (smallest) index of the element with the maximum magnitude.

    Note: The result index is 1-based index (not 0-based index).
    amax	_iamaxminxouts     r   iamaxrf   s       
 QV$$$r   c                 $    t          | |d          S )zFinds the (smallest) index of the element with the minimum magnitude.

    Note: The result index is 1-based index (not 0-based index).
    aminra   rc   s     r   iaminrj   {   rg   r   c                 Z   | j         dk    r't          d                    | j                             | j        j        }|dk    rd}n*|dk    rd}n!|dk    rd}n|dk    rd	}nt          d
          t          t          d|z   |z             }t          j	                    }d}t          |||          \  }}	}
	  ||| j        | j        j        d|           t          j        ||
           n# t          j        ||
           w xY w||	}n |j        |k    rt          j        |	|           |S )Nr   !x must be a 1D array (actual: {})r   r   r   r   r   r   r   r   i)r)   r+   r;   r$   charr0   r1   r   r   r<   _setup_result_ptrr-   r4   r5   setPointerModer   elementwise_copy)rd   re   namer$   rJ   funcrS   result_dtype
result_ptrresult	orig_modes              r   rb   rb      sM   v{{<CCAFKKLLLGLE||	#	#	#(((637T>**D%''FL$5\%# %#!J	1VQVQVZJ777fi0000fi0000
{	l	"	"vs+++Js   8C, ,Dc                    | j         dk    r't          d                    | j                             | j        j        }|dk    rt
          j        }nH|dk    rt
          j        }n5|dk    rt
          j        }n"|dk    rt
          j	        }nt          d          t          j                    }|                                }t          |||          \  }}}	  ||| j        | j        j        d|           t          j        ||           n# t          j        ||           w xY w||}n |j        |k    rt'          j        ||           |S )z&Computes the sum of the absolute of x.r   rl   r   r   r   r   r   )r)   r+   r;   r$   rn   r   sasumdasumscasumdzasumr0   r   r<   lowerro   r-   r4   r5   rp   r   rq   	rd   re   r$   rs   rS   rt   ru   rv   rw   s	            r   asumr      F   v{{<CCAFKKLLLGLE|||	#|	#}	#}(((%''F;;==L$5\%# %#!J	1VQVQVZJ777fi0000fi0000
{	l	"	"vs+++J   D D"c           	         t          ||           |j        j        }|dk    rt          j        }nH|dk    rt          j        }n5|dk    rt          j        }n"|dk    rt          j        }nt          d          t          j
                    }t          || |          \  } }}	  |||j        ||j        j        d|j        j        d           t          j        ||           dS # t          j        ||           w xY w)z5Computes y += a * x.

    (*) y will be updated.
    r   r   r   r   r   r   N)_check_two_vectorsr$   rn   r   saxpydaxpycaxpyzaxpyr0   r   r<   _setup_scalar_ptrr-   r4   r5   rp   )rG   rd   yr$   rs   rS   a_ptrrw   s           r   axpyr      s    
 q!GLE|||	#|	#|	#|(((%''F+FAu==Aui1VQVUAFJ16:qAAAfi00000fi0000s   !*C" "C9c           	      "   | j         j        }|dk    rt          j        }n5|dk    rt          j        }n"|dv rt          d          t          d          t          | |           t          j                    }|}t          |||          \  }}}		  ||| j
        | j        j        d|j        j        d|           t          j        ||	           n# t          j        ||	           w xY w||}n |j         |k    rt          j        ||           |S )$Computes the dot product of x and y.r   r   FDz&Use dotu() or dotc() for complex dtyper   r   )r$   rn   r   sdotddotr0   r   r   r<   ro   r-   r4   r5   rp   r   rq   
rd   r   re   r$   rs   rS   rt   ru   rv   rw   s
             r   dotr      s    GLE||{	#{	$@AAA(((q!%''FL$5\%# %#!J	1VQVQVZAFJ:FFFfi0000fi0000
{	l	"	"vs+++Js   *C C'c           	      (   | j         j        }|dv rt          | ||          S |dk    rt          j        }n"|dk    rt          j        }nt          d          t          | |           t          j	                    }|}t          |||          \  }}}		  ||| j        | j        j        d|j        j        d|           t          j        ||	           n# t          j        ||	           w xY w||}n |j         |k    rt          j        ||           |S )r   fdre   r   r   r   r   )r$   rn   r   r   cdotuzdotur0   r   r   r<   ro   r-   r4   r5   rp   r   rq   r   s
             r   dotur      &   GLE}}1aS!!!!	#|	#|(((q!%''FL$5\%# %#!J	1VQVQVZAFJ:FFFfi0000fi0000
{	l	"	"vs+++J   *C C*c           	      (   | j         j        }|dv rt          | ||          S |dk    rt          j        }n"|dk    rt          j        }nt          d          t          | |           t          j	                    }|}t          |||          \  }}}		  ||| j        | j        j        d|j        j        d|           t          j        ||	           n# t          j        ||	           w xY w||}n |j         |k    rt          j        ||           |S )z+Computes the dot product of x.conj() and y.r   r   r   r   r   r   )r$   rn   r   r   cdotczdotcr0   r   r   r<   ro   r-   r4   r5   rp   r   rq   r   s
             r   dotcr     r   r   c                    | j         dk    r't          d                    | j                             | j        j        }|dk    rt
          j        }nH|dk    rt
          j        }n5|dk    rt
          j        }n"|dk    rt
          j	        }nt          d          t          j                    }|                                }t          |||          \  }}}	  ||| j        | j        j        d|           t          j        ||           n# t          j        ||           w xY w||}n |j        |k    rt'          j        ||           |S )z(Computes the Euclidean norm of vector x.r   rl   r   r   r   r   r   )r)   r+   r;   r$   rn   r   snrm2dnrm2scnrm2dznrm2r0   r   r<   r}   ro   r-   r4   r5   rp   r   rq   r~   s	            r   nrm2r   7  r   r   c                 $   |j         dk    r't          d                    |j                             |j        j        }|dk    rt
          j        }nH|dk    rt
          j        }n5|dk    rt
          j        }n"|dk    rt
          j	        }nt          d          t          j                    }t          || |          \  } }}	  |||j        ||j        j        d           t          j        ||           dS # t          j        ||           w xY w)	z1Computes x *= a.

    (*) x will be updated.
    r   rl   r   r   r   r   r   N)r)   r+   r;   r$   rn   r   sscaldscalcscalzscalr0   r   r<   r   r-   r4   r5   rp   )rG   rd   r$   rs   rS   r   rw   s          r   scalr   X  s   
 	v{{<CCAFKKLLLGLE|||	#|	#|	#|(((%''F+FAu==Aui1VQVUAFJ222fi00000fi0000s   C8 8Dc                    | j         dk    r't          d                    | j                             |j         dk    r't          d                    |j                             | j        |j        k    r-t          d                    | j        |j                            | j        |j        k    r-t          d                    | j        |j                            d S )Nr   rl   z!y must be a 1D array (actual: {})z1x and y must be the same size (actual: {} and {})z2x and y must be the same dtype (actual: {} and {}))r)   r+   r;   r-   r$   r0   )rd   r   s     r   r   r   t  s    v{{<CCAFKKLLLv{{<CCAFKKLLLv "F1616224 4 	4w!' !6!'17335 5 	5 r   c                    t          j        |           }|t          |t          j                  rR||j        |k    rt          j        g |          }n|}|j        j        }t          j	        | t           j
                   nyt          |t          j                  rP|j        |k    rt          j        g |          }n|}|j        j        }t          j	        | t           j                   nt          d          |||fS )Nr#   z(out must be either cupy or numpy ndarray)r   getPointerMode
isinstancer.   ndarrayr$   r/   r4   r5   rp   CUBLAS_POINTER_MODE_DEVICEr2   rC   CUBLAS_POINTER_MODE_HOSTr0   )rS   re   r$   moderv   ru   s         r   ro   ro     s     ((D
{jdl33{;#)u,,Z%000FFF[_
ff&GHHHH	C	'	' D9[5111FFF]'
ff&EFFFFBCCCvt##r   c                    t          ||          \  }}t          j        |           }t          |t          j                  r t          j        | t          j                   nt          j        | t          j                   |||fS r
   )	_get_scalar_ptrr   r   r   r.   r   rp   r   r   )rS   rG   r$   r   r   s        r   r   r     su    q%((HAu ((D!T\"" Gff&GHHHHff&EFFFeT>r   c                 (   t          | t          j                  r.| j        |k    rt          j        | |          } | j        j        }nGt          | t          j                  r| j        |k    st          j        | |          } | j        j        }| |fS )Nr#   )	r   r.   r   r$   arrayr4   r5   r2   rC   )rG   r$   r   s      r   r   r     s    !T\"" 7e
1E***A
1em,, 	,E1A1AAU+++Ae8Or   c                    |j         j        }|dk    rt          j        }nH|dk    rt          j        }n5|dk    rt          j        }n"|dk    rt          j        }nt          d          |j        dk    sJ |j        |j        cxk    rdk    sn J |j         |j         cxk    r|j         k    sn J |j	        \  }}	t          |           } | t          j        k    r|	|}}
n||	}}
|j	        d         |
k    sJ |j	        d         |k    sJ t          ||j                   \  }}t          ||j                   \  }}t          j                    }t          j        |          }t!          |t"          j                  st!          |t"          j                  rt!          |t"          j                  s t#          j        |          }|j        j        }t!          |t"          j                  s t#          j        |          }|j        j        }t          j        |t          j                   nt          j        |t          j                   	 |j        r5 ||| ||	||j        j        ||j        j        d||j        j        d           n|j        rn| t          j        k    r^| t          j        k    rt          j        } nt          j        }  ||| |	|||j        j        |	|j        j        d||j        j        d           nJ|                    d	          } ||| ||	||j        j        ||j        j        d||j        j        d           t          j        ||           d
S # t          j        ||           w xY w)zComputes y = alpha * op(a) @ x + beta * y

    op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
    op(a) = a.T.conj() if transa is 'H'.

    Note: ''y'' will be updated.
    r   r   r   r   r   r!   r   r   orderN)r$   rn   r   sgemvdgemvcgemvzgemvr0   r)   r*   _trans_to_cublas_oprB   r   r   r<   r   r   r.   r   r   r4   r5   rp   r   r   _f_contiguous_c_contiguousCUBLAS_OP_CCUBLAS_OP_Tr%   )transaalpharG   rd   betar   r$   rs   mrN   xlenylen	alpha_ptrbeta_ptrrS   rw   s                   r   gemvr     sk    GLE|||	#|	#|	#|(((6Q;;;;6QV    q      7ag((((((((((7DAq ((F###dd71:71:&uag66E9$T1733ND(%''F%f--I%&& 	G*T4<*H*H 	G%.. 	'Ju%%E
I$-- 	%:d##Dy}Hff&GHHHHff&EFFF1? 	*DAy!&*aQ16:q* * * *_ 
	*6+=!=!=+++++DAy!&*aQ16:q* * * * S!!ADAy!&*aQ16:q* * * 	fi00000fi0000s   C;M M0c                    |j         j        }|dk    rt          j        }n5|dk    rt          j        }n"|dv rt          d          t          d          |j        dk    sJ |j        |j        cxk    rdk    sn J |j         |j         cxk    r|j         k    sn J |j        \  }}|j        d         |k    sJ |j        d         |k    sJ t          j	                    }t          || |          \  } }	}
|j        j        |j        j        }}	 |j        r |||||	|d|d|j        j        |
  
         no|j        r |||||	|d|d|j        j        |
  
         nI|                    d	
          } |||||	|d|d|j        j        |
  
         t!          j        ||           t          j        ||
           dS # t          j        ||
           w xY w)DComputes a += alpha * x @ y.T

    Note: ''a'' will be updated.
    r   r   r   z#Use geru or gerc for complex dtypesr   r!   r   r   r   r   N)r$   rn   r   sgerdgerr0   r)   r*   r   r<   r   r4   r5   r   r   r%   r   rq   rp   r   rd   r   rG   r$   rs   r   rN   rS   r   rw   x_ptry_ptraas                 r   gerr     s   
 GLE||{	#{	$=>>>(((6Q;;;;6QV    q      7ag((((((((((7DAq71:????71:????%''F"3FE5"I"IE9i6:qvz5E
1? 	*DAy%E1afj!LLLL_ 	*DAy%E1afj!LLLLc""BDAy%E1bgk1MMM"2q)))fi00000fi0000s   BF: :Gc                    |j         j        }|dv rt          | |||          S |dk    rt          j        }n"|dk    rt          j        }nt          d          |j        dk    sJ |j        |j        cxk    rdk    sn J |j         |j         cxk    r|j         k    sn J |j        \  }}|j        d         |k    sJ |j        d         |k    sJ t          j
                    }t          || |          \  } }	}
|j        j        |j        j        }}	 |j        r |||||	|d|d|j        j        |
  
         no|j        r |||||	|d|d|j        j        |
  
         nI|                    d          } |||||	|d|d|j        j        |
  
         t#          j        ||           t          j        ||
           d	S # t          j        ||
           w xY w)
r   r   r   r   r   r!   r   r   r   N)r$   rn   r   r   cgeruzgerur0   r)   r*   r   r<   r   r4   r5   r   r   r%   r   rq   rp   r   s                 r   gerur     s   
 GLE}}5!Q"""	#|	#|(((6Q;;;;6QV    q      7ag((((((((((7DAq71:????71:????%''F"3FE5"I"IE9i6:qvz5E
1? 	*DAy%E1afj!LLLL_ 	*DAy%E1afj!LLLLc""BDAy%E1bgk1MMM"2q)))fi00000fi0000s   BF= =Gc                 b   |j         j        }|dv rt          | |||          S |dk    rt          j        }n"|dk    rt          j        }nt          d          |j        dk    sJ |j        |j        cxk    rdk    sn J |j         |j         cxk    r|j         k    sn J |j        \  }}|j        d         |k    sJ |j        d         |k    sJ t          j
                    }t          || |          \  } }	}
|j        j        |j        j        }}	 |j        r |||||	|d|d|j        j        |
  
         nI|                    d          } |||||	|d|d|j        j        |
  
         t!          j        ||           t          j        ||
           d	S # t          j        ||
           w xY w)
zKComputes a += alpha * x @ y.T.conj()

    Note: ''a'' will be updated.
    r   r   r   r   r!   r   r   r   N)r$   rn   r   r   cgerczgercr0   r)   r*   r   r<   r   r4   r5   r   r%   r   rq   rp   r   s                 r   gercr   8  s   
 GLE}}5!Q"""	#|	#|(((6Q;;;;6QV    q      7ag((((((((((7DAq71:????71:????%''F"3FE5"I"IE9i6:qvz5E1? 	*DAy%E1afj!LLLLc""BDAy%E1bgk1MMM"2q)))fi00000fi0000s   A/F F.Fc                 T   |j         j        }|dk    rt          j        }n"|dk    rt          j        }nt          d          |j        dk    sJ |j        |j        cxk    rdk    sn J |j         |j         cxk    r|j         k    sn J |j        \  }	}
|j        d         |
k    sJ |j        d         |
k    sJ |j        s|	                    d          }t          ||j                   \  }}t          ||j                   \  }}t          j                    }t          j        |          }t          |t          j                  st          |t          j                  rt          |t          j                  s t          j        |          }|j        j        }t          |t          j                  s t          j        |          }|j        j        }t          j        |t          j                   nt          j        |t          j                   |rt          j        }nt          j        }t          j                    }	  ||||
| ||j        j        |	|j        j        d||j        j        d           t          j        ||           n# t          j        ||           w xY w|S )	z)Computes y = alpha*A @ x + beta * y

    r   r   zComplex dtypes not supportedr!   r   r   r   r   )r$   rn   r   ssbmvdsbmvr0   r)   r*   r   r%   r   r   r<   r   r   r.   r   r   r4   r5   rp   r   r   CUBLAS_FILL_MODE_LOWERCUBLAS_FILL_MODE_UPPER)kr   rG   rd   r   r   r}   r$   rs   r   rN   r   r   rS   rw   uplos                   r   sbmvr   [  s    GLE|||	#|67776Q;;;;6QV    q      7ag((((((((((7DAq71:????71:????? FFF&uag66E9$T1733ND(%''F%f--I%&& 	G*T4<*H*H 	G%.. 	'Ju%%E
I$-- 	%:d##Dy}Hff&GHHHHff&EFFF -,,%''F1VT1a
Aqvz1qvz1	& 	& 	& 	fi0000fi0000Hs   4J J%c                    | dk    s| t           j        k    rt           j        } nh| dk    s| t           j        k    rt           j        } nE| dk    s| t           j        k    rt           j        } n"t	          d                    |                     | S )NNTHzinvalid trans (actual: {}))r   rB   r   r   r0   r;   )transs    r   r   r     s    ||u 222"	#&"444"	#&"444"4;;EBBCCCLr   c                     d }|t           j        t           j        fv r.| j        r| j        d         }n| j        r| j        d         }d|z
  }||fS )Nr   r   )r   rB   r   r   r*   r   )rG   r   lds      r   _decide_ld_and_transr     s\    	B#V%7888? 	BB_ 	BIEu9r   c                 b    |*| j         d         }| j        s|                     d          } | |fS )Nr   r   r   )r*   r   r%   )rG   rT   s     r   _change_order_if_necessaryr     s8    
{gaj 	"S!!Ac6Mr         ?        c                 	   |j         |j         cxk    rdk    sn J |j        |j        k    sJ |j        j        }|dk    rt          j        }nH|dk    rt          j        }n5|dk    rt          j        }n"|dk    rt          j        }nt          d          t          |           } t          |          }| t          j
        k    r|j        \  }	}
n
|j        \  }
}	|t          j
        k    r!|j        d         }|j        d         |
k    sJ n |j        d         }|j        d         |
k    sJ |t          j        |	|f|d
          }d}n)|j         dk    sJ |j        |	|fk    sJ |j        |k    sJ t          ||j                  \  }}t          ||j                  \  }}t          j                    }t          j        |          }t%          |t          j                  st%          |t          j                  rt%          |t          j                  s t          j        |          }|j        j        }t%          |t          j                  s t          j        |          }|j        j        }t          j        |t          j                   nt          j        |t          j                   t5          ||           \  }} t5          ||          \  }}|||j        ri	  ||| ||	||
||j        j        ||j        j        |||j        j        |	           t          j        ||           n# t          j        ||           w xY w|S |j        ro	  ||d|z
  d| z
  ||	|
||j        j        ||j        j        |||j        j        |           t          j        ||           n# t          j        ||           w xY w|S t;          ||          \  }}t;          ||          \  }}|}|j        s|                    d          }	  ||| ||	||
||j        j        ||j        j        |||j        j        |	           t          j        ||           n# t          j        ||           w xY w|j        st?          j         ||           |S )a  Computes out = alpha * op(a) @ op(b) + beta * out

    op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
    op(a) = a.T.conj() if transa is 'H'.
    op(b) = b if transb is 'N', op(b) = b.T if transb is 'T',
    op(b) = b.T.conj() if transb is 'H'.
    r!   r   r   r   r   r   r   r   Nr$   r   r   r   )!r)   r$   rn   r   sgemmdgemmcgemmzgemmr0   r   rB   r*   r.   r/   r   r   r<   r   r   r   r   r4   r5   rp   r   r   r   r   r   r   r%   r   rq   )r   transbrG   rH   re   r   r   r$   rs   r   r   rN   r   r   rS   rw   rT   rW   r   s                      r   gemmr     s    6QV    q      7agGLE|||	#|	#|	#|((( ((F ((F###w11w1###GAJwqzQGAJwqzQ
{j!QuC888x1}}}}yQF""""yE!!!!&uag66E9$T1733ND(%''F%f--I%&& 	G*T4<*H*H 	G%.. 	'Ju%%E
I$-- 	%:d##Dy}Hff&GHHHHff&EFFF&q&11KC&q&11KCK3; 	9VVVQ1iVZafj#x   %fi8888%fi8888J 	9VQZVQ1iVZafj#x   %fi8888%fi8888J'3//FAs'3//FAsA  HH3H1VVVQ1iSVZh
A	7 	7 	7 	fi0000fi0000 'q#&&&Js$    6L, ,M<N" "N96Q Q'c                    |j         |j         cxk    rdk    sn J |j        |j        k    sJ |j        j        }|dk    rt          j        }nH|dk    rt          j        }n5|dk    rt          j        }n"|dk    rt          j        }nt          d          t          |           } t          |          }| t          j
        k    r|j        \  }	}
n
|j        \  }
}	|t          j
        k    r|j        |	|
fk    sJ n|j        |
|	fk    sJ |t          j        |	|
f|d          }n)|j         dk    sJ |j        |	|
fk    sJ |j        |k    sJ t          ||j                  \  }}t          ||j                  \  }}t          j                    }t          j        |          }t%          |t          j                  st%          |t          j                  rt%          |t          j                  s t          j        |          }|j        j        }t%          |t          j                  s t          j        |          }|j        j        }t          j        |t          j                   nt          j        |t          j                   t5          ||           \  }} t5          ||          \  }}|||j        rh	  ||| ||	|
||j        j        |||j        j        ||j        j        |	           t          j        ||           n# t          j        ||           w xY w|S |j        rn	  ||d	| z
  d	|z
  |
|	||j        j        |||j        j        ||j        j        |
           t          j        ||           n# t          j        ||           w xY w|S t;          ||          \  }}t;          ||          \  }}|}|j        s|                    d
          }	  ||| ||	|
||j        j        |||j        j        ||j        j        |	           t          j        ||           n# t          j        ||           w xY w|j        st?          j         ||           |S )zComputes alpha * op(a) + beta * op(b)

    op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
    op(a) = a.T.conj() if transa is 'H'.
    op(b) = b if transb is 'N', op(b) = b.T if transb is 'T',
    op(b) = b.T.conj() if transb is 'H'.
    r!   r   r   r   r   r   Nr   r   r   )!r)   r$   rn   r   sgeamdgeamcgeamzgeamr0   r   rB   r*   r.   r/   r   r   r<   r   r   r   r   r4   r5   rp   r   r   r   r   r   r   r%   r   rq   )r   r   r   rG   r   rH   re   r$   rs   r   rN   r   r   rS   rw   rT   rW   r   s                     r   geamr     s    6QV    q      7agGLE|||	#|	#|	#|((( ((F ((F###w11w1###w1a&     w1a&    
{j!QuC888x1}}}}yQF""""yE!!!!&uag66E9$T1733ND(%''F%f--I%&& 	G*T4<*H*H 	G%.. 	'Ju%%E
I$-- 	%:d##Dy}Hff&GHHHHff&EFFF&q&11KC&q&11KCK3; 	9VVVQ9afj(AFJSX\1F F F %fi8888%fi8888J 	9VQvXqxAy!&*(AFJSX\1F F F %fi8888%fi8888J'3//FAs'3//FAsA  HH3H1VVVQ9afj#qvz3
A	7 	7 	7 	fi0000fi0000 'q#&&&Js$   <5L L+;M< <N5P) )Q r   c                 p   |j         dk    sJ d|j         cxk    rdk    sn J |j        |j        k    sJ |j        j        }|dk    rt          j        }nH|dk    rt          j        }n5|dk    rt          j        }n"|dk    rt          j        }nt          d          | dk    s| t          j	        k    rt          j	        } nE| d	k    s| t          j
        k    rt          j
        } n"t          d
                    |                     |j        \  }}| t          j	        k    r$|j        |dz
  t          |          z  dz   k    sJ n#|j        |dz
  t          |          z  dz   k    sJ |&|j        rd}	nd}	t#          j        ||f||	          }n1|j         dk    sJ |j        |j        k    sJ |j        |j        k    sJ t'          j                    }
|j        rS|j        s|                    d          } ||
d| z
  |||j        j        ||j        j        ||j        j        |
  
         n|j        s|                    d          }|}|j        s|                    d          } ||
| |||j        j        ||j        j        ||j        j        |
  
         |j        st3          j        ||           |S )znComputes diag(x) @ a or a @ diag(x)

    Computes diag(x) @ a if side is 'L', a @ diag(x) if side is 'R'.
    r!   r   r   r   r   r   r   LRzinvalid side (actual: {})r   NCr   r   )r)   r$   rn   r   sdgmmddgmmcdgmmzdgmmr0   CUBLAS_SIDE_LEFTCUBLAS_SIDE_RIGHTr+   r;   r*   r-   absr   r.   r/   r   r<   r%   r4   r5   r   r   rq   )siderG   rd   re   incxr$   rs   r   rN   r   rS   r   s               r   dgmmr  d  s   
 6Q;;;;!7agGLE|||	#|	#|	#|(((s{{df555&	 888'4;;DAABBB7DAqv&&&v!a%3t99,q000000v!a%3t99,q00000
{? 	EEEj!QuE:::x1}}}}yAG####yAG####%''F
 + 	"S!!AVQXq!QVZAFJX\1	 	 	 	  	"S!!A  	$s##AVT1aQ
DVZ	 	 	  	+"1c***Jr   c                 p   |j         dk    sJ |j        j        }|dk    rt          j        }nH|dk    rt          j        }n5|dk    rt          j        }n"|dk    rt          j        }nt          d          t          |           } | t          j
        k    r|j        \  }}	n
|j        \  }	}|t          j        ||f|d          }d	}n)|j         dk    sJ |j        ||fk    sJ |j        |k    sJ |rt          j        }
nt          j        }
t!          ||j                  \  }}t!          ||j                  \  }}t#          j                    }t          j        |          }t)          |t          j                  st)          |t          j                  rt)          |t          j                  s t          j        |          }|j        j        }t)          |t          j                  s t          j        |          }|j        j        }t          j        |t          j                   nt          j        |t          j                   t9          ||           \  }} t9          ||           \  }}|j        r|j        s(|                    d
          }d| z
  } |j        d         }	  ||d|
z
  | ||	||j        j        |||j        j        |           t          j        ||           n# t          j        ||           w xY w|j        s(|                    d          }|j        d         }d| z
  } |}|j        s|                    d          }	  |||
| ||	||j        j        |||j        j        |           t          j        ||           n# t          j        ||           w xY w|j        s||d<   |S )a"  Computes out := alpha*op1(a)*op2(a) + beta*out

    op1(a) = a if trans is 'N', op2(a) = a.T if transa is 'N'
    op1(a) = a.T if trans is 'T', op2(a) = a if transa is 'T'
    lower specifies  whether  the  upper  or  lower triangular
    part  of the  array  out  is to be  referenced
    r!   r   r   r   r   r   Nr   r   r  r   r   r   .) r)   r$   rn   r   ssyrkdsyrkcsyrkzsyrkr0   r   rB   r*   r.   zerosr   r   r   r   r<   r   r   r   r   r4   r5   rp   r   r   r   r   r%   r   )r   rG   re   r   r   r}   r$   rs   rN   r   r   r   r   rS   rw   rT   ldo_r   s                      r   syrkr    s    6Q;;;;GLE|||	#|	#|	#|(((&&E"""w11w1
{j!QuC888x1}}}}yQF""""yE!!!! -,,&uag66E9$T1733ND(%''F%f--I%&& 	G*T4<*H*H 	G%.. 	'Ju%%E
I$-- 	%:d##Dy}Hff&GHHHHff&EFFF%a//JC!#u--FC
  	S!!AIE'!*C	5DT5!QAFJ38<. . . !&)4444F!&)4444  	S!!A'!*CIE  	$s##A	5DuaAFJ38<. . . !&)4444F!&)4444  	CHJs   &,K( (K?)N N'r
   )F)Nr   r   )Nr   )Nr   r   F))r2   r   r9   r.   r   cupy_backends.cuda.libsr   	cupy.cudar   cupy.linalgr   r   r   r   r^   rf   rj   rb   r   r   r   r   r   r   r   r   ro   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   <module>r     s                  * * * * * *               
     
WO WO WOt% % % %% % % %  B   B1 1 16   :   :   :   B1 1 18
5 
5 
5$ $ $*  	 	 	>1 >1 >1B#1 #1 #1L"1 "1 "1J 1  1  1F2 2 2 2j	 	 	    Z Z Z ZzU U U Up9 9 9 9xT T T T T Tr   