
    `i-                     h   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ  ej        d          dd            Z ej        d          dd            ZddZd dZdZ ej        dddde          Zd Z ej        dddde          Z ej        d          d!d            ZdS )"    )warnN)cublas)device)runtime)_util)_uarray	lu_factorFTc                 $    t          | ||          S )a  LU decomposition.

    Decompose a given two-dimensional square matrix into ``P * L * U``,
    where ``P`` is a permutation matrix,  ``L`` lower-triangular with
    unit diagonal elements, and ``U`` upper-triangular matrix.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``
        overwrite_a (bool): Allow overwriting data in ``a`` (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        tuple:
            ``(lu, piv)`` where ``lu`` is a :class:`cupy.ndarray`
            storing ``U`` in its upper triangle, and ``L`` without
            unit diagonal elements in its lower triangle, and ``piv`` is
            a :class:`cupy.ndarray` storing pivot indices representing
            permutation matrix ``P``. For ``0 <= i < min(M,N)``, row
            ``i`` of the matrix was interchanged with row ``piv[i]``

    .. seealso:: :func:`scipy.linalg.lu_factor`
    )
_lu_factor)aoverwrite_acheck_finites      q/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/cupyx/scipy/linalg/_decomp_lu.pyr	   r	      s    8 al333    luc                    t          | ||          \  }}|j        \  }}t          ||          }t          |          \  }	}
|rt	          |	d|dz
  |d           |	|
fS |j        j        dv rt          j        nt          j	        }t          j        t          j        |f|                    }t	          |d|dz
  |d           ||	|
fS )a  LU decomposition.

    Decomposes a given two-dimensional matrix into ``P @ L @ U``, where ``P``
    is a permutation matrix, ``L`` is a lower triangular or trapezoidal matrix
    with unit diagonal, and ``U`` is a upper triangular or trapezoidal matrix.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``.
        permute_l (bool): If ``True``, perform the multiplication ``P @ L``.
        overwrite_a (bool): Allow overwriting data in ``a`` (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        tuple:
            ``(P, L, U)`` if ``permute_l == False``, otherwise ``(PL, U)``.
            ``P`` is a :class:`cupy.ndarray` storing permutation matrix with
            dimension ``(M, M)``. ``L`` is a :class:`cupy.ndarray` storing
            lower triangular or trapezoidal matrix with unit diagonal with
            dimension ``(M, K)`` where ``K = min(M, N)``. ``U`` is a
            :class:`cupy.ndarray` storing upper triangular or trapezoidal
            matrix with dimension ``(K, N)``. ``PL`` is a :class:`cupy.ndarray`
            storing permuted ``L`` matrix with dimension ``(M, K)``.

    .. seealso:: :func:`scipy.linalg.lu`
    r      fFdtype)r   shapemin_cupy_split_lu_cupy_laswpr   charnumpyfloat32float64cupydiagones)r   	permute_lr   r   r   pivmnkLUr_dtypePs                r   r   r   ,   s    > K66GB8DAqAq		A"DAq Aq!A#sB'''1v#%8=D#8#8%--emIdiG44455Aq!A#sB'''1ayr   c           
         ddl m} t          j        |           } t	          j        |            | j        }|j        dk    r|j        }|j	        }n_|j        dk    r|j
        }|j        }nE|j        dk    r|j        }|j        }n+|j        dk    r|j        }|j        }nd}t!          |          |                     |d|           } |rE| j        j        dk    r5t          j        |                                           st+          d	          t-          j                    }t          j        d
t2          j                  }	| j        \  }
}t          j        t9          |
|          ft2          j                  } |||
|| j        j        |
          }t          j        ||          } |||
|| j        j        |
|j        j        |j        j        |	j        j                   t@          j!        s%|	d         dk     rt+          d|	d          z            |	d         dk    r tE          d|	d         z  tF          d           |d
z  }| |fS )Nr   cusolverfdFD>Only float32, float64, complex64 and complex128 are supported.ordercopy#array must not contain infs or NaNsr   r   z=illegal value in %d-th argument of internal getrf (lu_factor)z4Diagonal number %d is exactly zero. Singular matrix.   )
stacklevel)$cupy_backends.cuda.libsr.   r    asarrayr   
_assert_2dr   r   sgetrfsgetrf_bufferSizedgetrfdgetrf_bufferSizecgetrfcgetrf_bufferSizezgetrfzgetrf_bufferSizeNotImplementedErrorastypekindisfiniteall
ValueErrorr   get_cusolver_handleemptyr   int32r   r   intcdataptrr   is_hipr   RuntimeWarning)r   r   r   r.   r   getrfgetrf_bufferSizemsgcusolver_handledev_infor%   r&   ipiv
buffersize	workspaces                  r   r   r   [   st   000000QA	QGEzS#5	s		#5	s		#5	s		#5N!#&&&	c[::A 77<3t}Q'7'7';';'='=57 7 7 022Oz!5;///H7DAq:s1ayyl%*555D!!/1aQGGJ
:U333I 
E/1aQ	0B
)-*, , , > :hqkAoo 69A!E F F 	F	!qC{*q	: 	: 	: 	: 	AIDt9r   Cc           
         | j         sJ | j        \  }}t          ||          }|dk    rdnd}t          j        ||f|| j                  }t          j        ||f|| j                  }||z  }t          | ||||j        |||           ||fS )Nr1   r[   )r5   r   size)_f_contiguousr   r   r    rL   r   _kernel_cupy_split_lu_c_contiguous)LUr5   r%   r&   r'   r(   r)   r^   s           r   r   r      s    8DAqAq		AC<<CCSE
Aq6bh777A
Aq6bh777Aq5D"aAq14HHHHq6Mr   z
__device__ inline int get_index(int row, int col, int num_rows, int num_cols,
                                bool c_contiguous)
{
    if (c_contiguous) {
        return col + num_cols * row;
    } else {
        return row + num_rows * col;
    }
}
z6raw T LU, int32 M, int32 N, int32 K, bool C_CONTIGUOUSzraw T L, raw T Ua$  
    // LU: shape: (M, N)
    // L: shape: (M, K)
    // U: shape: (K, N)
    const T* ptr_LU = &(LU[0]);
    T* ptr_L = &(L[0]);
    T* ptr_U = &(U[0]);
    int row, col;
    if (C_CONTIGUOUS) {
        row = i / N;
        col = i % N;
    } else {
        row = i % M;
        col = i / M;
    }
    T lu_val = ptr_LU[get_index(row, col, M, N, false)];
    T l_val, u_val;
    if (row > col) {
        l_val = lu_val;
        u_val = static_cast<T>(0);
    } else if (row == col) {
        l_val = static_cast<T>(1);
        u_val = lu_val;
    } else {
        l_val = static_cast<T>(0);
        u_val = lu_val;
    }
    if (col < K) {
        ptr_L[get_index(row, col, M, K, C_CONTIGUOUS)] = l_val;
    }
    if (row < K) {
        ptr_U[get_index(row, col, K, N, C_CONTIGUOUS)] = u_val;
    }
    cupyx_scipy_linalg_split_lu)preamblec                     | j         \  }}|j         d         }d|k    r||k    r||k     sJ | j        s	| j        sJ t          ||||||| j        | |	  	         d S )Nr   r]   )r   ra   r_   _kernel_cupy_laswp)Ak1k2rX   incxr%   r&   r'   s           r   r   r      st    7DAq
1A77rRxxBFFF*?-ao---q!RT4!!LLLLLLr   zOint32 M, int32 N, int32 K1, int32 K2, raw I IPIV, int32 INCX, bool C_CONTIGUOUSzraw T Aa  
    // IPIV: 0-based pivot indices. shape: (K,)  (*) K > K2
    // A: shape: (M, N)
    T* ptr_A = &(A[0]);
    if (K1 > K2) return;
    int row_start, row_end, row_inc;
    if (INCX > 0) {
        row_start = K1; row_end = K2; row_inc = 1;
    } else if (INCX < 0) {
        row_start = K2; row_end = K1; row_inc = -1;
    } else {
        return;
    }
    int col = i;
    int row1 = row_start;
    while (1) {
        int row2 = IPIV[row1];
        if (row1 != row2) {
            int idx1 = get_index(row1, col, M, N, C_CONTIGUOUS);
            int idx2 = get_index(row2, col, M, N, C_CONTIGUOUS);
            T tmp       = ptr_A[idx1];
            ptr_A[idx1] = ptr_A[idx2];
            ptr_A[idx2] = tmp;
        }
        if (row1 == row_end) break;
        row1 += row_inc;
    }
    cupyx_scipy_linalg_laswplu_solvec                 x   ddl m} | \  }}t          j        |           t          j        |           t          j        |           |j        d         }||j        d         k    rt          d          |j        }	|	j	        dk    r|j
        }
nJ|	j	        dk    r|j        }
n7|	j	        dk    r|j        }
n$|	j	        dk    r|j        }
nd}t          |          |dk    rt          j        }n5|d	k    rt          j        }n"|d
k    rt          j        }nt          d          |                    |	dd          }|                    |j        dd          }|d	z  }|                    |	d|           }|r|j        j        dk    r5t+          j        |                                          st          d          |j        j        dk    r5t+          j        |                                          st          d          |j        d	k    rd	n|j        d	         }t3          j                    }t+          j        d	t8          j                  } |
|||||j        j        ||j        j        |j        j        ||j        j        
  
         t@          j!        s%|d         dk     rt          d|d          z            |S )a9  Solve an equation system, ``a * x = b``, given the LU factorization of ``a``

    Args:
        lu_and_piv (tuple): LU factorization of matrix ``a`` (``(M, M)``)
            together with pivot indices.
        b (cupy.ndarray): The matrix with dimension ``(M,)`` or
            ``(M, N)``.
        trans ({0, 1, 2}): Type of system to solve:

            ========  =========
            trans     system
            ========  =========
            0         a x  = b
            1         a^T x = b
            2         a^H x = b
            ========  =========
        overwrite_b (bool): Allow overwriting data in b (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(M,)`` or ``(M, N)``.

    .. seealso:: :func:`scipy.linalg.lu_solve`
    r   r-   zincompatible dimensions.r/   r0   r1   r2   r3   r   r8   zunknown transFr4   Tzarray must not contain infs or NaNs.
Note that when a singular matrix is given, unlike scipy.linalg.lu_factor, cupyx.scipy.linalg.lu_factor returns an array containing NaN.r7   r   z<illegal value in %d-th argument of internal getrs (lu_solve))"r:   r.   r   _assert_cupy_arrayr<   _assert_stacked_squarer   rJ   r   r   sgetrsdgetrscgetrszgetrsrE   r   CUBLAS_OP_NCUBLAS_OP_TCUBLAS_OP_CrF   rG   r    rH   rI   ndimr   rK   rL   r   rM   rO   rP   r   rQ   )
lu_and_pivbtransoverwrite_br   r.   r   rX   r%   r   getrsrU   r&   rV   rW   s                  r   rl   rl     s   > 100000JR	R   	R	 $$$
AAGAJ3444HEzS	s			s			s		N!#&&&zz"	!"	!")))	5%	0	0B;;tz4;88DAID	c[::A 	78=Cb(9(9(=(=(?(?34 4 4
 7<3t}Q'7'7';';'='=57 7 7 Vq[[agajA022Oz!5;///H 
E/

QQ	qvz
X]     
 > EhqkAoo 58@|D E E 	E Hr   )FT)FFT)r[   )r   FT)warningsr   r   r    	cupy.cudar   r   r   cupy.linalgr   cupyx.scipy.linalgr   
implementsr	   r   r   r   _device_get_indexElementwiseKernelr`   r   rf   rl    r   r   <module>r      s                                   & & & & & & K  4 4 4 ! 4< D+ + + +\8 8 8 8v	 	 	 	
  /.<!D ",=K& & & RM M M ,T+8 ):A! ! ! H J` ` `  ` ` `r   