
    `i                         d Z ddlZddlmZmZ ddlmZ  ed           G d de                      Zedk    r ej	                     dS dS )	a  
Matrix multiplication example via `cuda.jit`.

Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella

Contents in this file are referenced from the sphinx-generated docs.
"magictoken" is used for markers as beginning and ending of example text.
    N)CUDATestCaseskip_on_cudasim)captured_stdoutz4cudasim doesn't support cuda import at non-top-levelc                   2     e Zd ZdZ fdZ fdZd Z xZS )
TestMatMulzo
    Text matrix multiplication using simple, shared memory/square, and shared
    memory/nonsquare cases.
    c                     t                      | _        | j                                         t                                                       d S N)r   _captured_stdout	__enter__supersetUpself	__class__s    }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/numba/cuda/tests/doc_examples/test_matmul.pyr   zTestMatMul.setUp   s;     / 1 1'')))    c                     | j                             d d d            t                                                       d S r	   )r
   __exit__r   tearDownr   s    r   r   zTestMatMul.tearDown   s:    &&tT4888r   c                 	   ddl mm ddl}ddl}j        fd            }|                    d                              ddg          }|                    ddg          }|	                    ddg          }
                    |          }
                    |          }
                    |          }	d}
|                    |j        d         |
d         z            }|                    |j        d         |
d         z            }||f} |||
f         |||	           |	                                }t          |           t          ||z             dj        fd	            }|                    d                              ddg          }|                    ddg          }|	                    ddg          }
                    |          }
                    |          }
                    |          }	f}
|                    |j        d         |
d         z            }|                    |j        d         |
d         z            }||f} |||
f         |||	           |	                                }t          |           t          ||z             d
}|                     |                    |||z  k              |           |                    d                              ddg          }|                    ddg          }|	                    ddg          }
                    |          }
                    |          }
                    |          }	f}
t#          |j        d         |j        d                   }t#          |j        d         |j        d                   }|                    ||
d         z            }|                    ||
d         z            }||f} |||
f         |||	           |	                                }t          |           t          ||z             d}|                     |                    |||z  k              |           dS )z/Test of matrix multiplication on various cases.r   )cudafloat32Nc                                          d          \  }}||j        d         k     rQ||j        d         k     rBd}t          | j        d                   D ]}|| ||f         |||f         z  z  }||||f<   dS dS dS )z2Perform square matrix multiplication of C = A * B.   r              N)gridshaperange)ABCijtmpkr   s          r   matmulz)TestMatMul.test_ex_matmul.<locals>.matmul)   s     99Q<<DAq171:~~!agaj..qwqz** - -A1QT7Qq!tW,,CC!Q$	 ~..r         )r(   r(   r   c                 @   j                             f          }j                             f          }                    d          \  }}j        j        }j        j        }j        j        }	 d          }
t          |	          D ]}d|||f<   d|||f<   || j        d         k     r,||z  z   | j        d         k     r| |||z  z   f         |||f<   ||j        d         k     r,||z  z   |j        d         k     r|||z  z   |f         |||f<   	                                 t                    D ]}|
|||f         |||f         z  z  }
	                                 ||j        d         k     r||j        d         k     r|
|||f<   dS dS dS )z
            Perform matrix multiplication of C = A * B using CUDA shared memory.

            Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella
            )r   dtyper   r   r   r   N)
sharedarrayr   	threadIdxxygridDimr   r   syncthreads)r    r!   r"   sAsBr/   r0   txtybpgr%   r#   r$   TPBr   r   s                r   fast_matmulz.TestMatMul.test_ex_matmul.<locals>.fast_matmulN   s    ""#s7"CCB""#s7"CCB99Q<<DAq!B!B,.C '"++C3ZZ # #2r6
2r6
qwqz>>rAG|qwqz&A&A!"1b1s7l?!3Br2vJqwqz>>rAG|qwqz&A&A!"2C<?!3Br2vJ   """ s 1 1A2b!e9r!R%y00CC   """"171:~~!agaj..!Q$ ~..r   z5fast_matmul incorrect for shared memory, square case.)msgs            z9fast_matmul incorrect for shared memory, non-square case.)numbar   r   numpymathjitarangereshapeoneszeros	to_deviceceilr   copy_to_hostprint
assertTrueallmax)r   nprA   r'   x_hy_hz_hx_dy_dz_dthreadsperblockblockspergrid_xblockspergrid_yblockspergridr9   r:   
grid_y_max
grid_x_maxr8   r   r   s                     @@@r   test_ex_matmulzTestMatMul.test_ex_matmul    s    	(''''''' 
	 	 	 	 
	 iimm##QF++ggq!foohh1vnnS!!nnS!!nnS!!"))CIaL?13E$EFF))CIaL?13E$EFF(/:.}o-.sC===  c


cCi 	'	 '	 '	 '	 '	 '	 
'	V iimm##QF++ggq!foohh1vnnS!!nnS!!nnS!!*))CIaL?13E$EFF))CIaL?13E$EFF(/:3M?23CcBBB  c


cCi FscCi/00c::: iinn$$aW--ggr1ghh1vnnS!!nnS!!nnS!!*1sy|44
1sy|44
))J1C$CDD))J1C$CDD(/:3M?23CcBBB  c


cCi JscCi/00c:::::r   )__name__
__module____qualname____doc__r   r   r[   __classcell__)r   s   @r   r   r      sq         
        
I; I; I; I; I; I; I;r   r   __main__)
r_   unittestnumba.cuda.testingr   r   numba.tests.supportr   r   r\   main r   r   <module>rg      s      < < < < < < < < / / / / / / GHHZ; Z; Z; Z; Z; Z; Z; IHZ;z zHMOOOOO r   