
    `iR#                     
   d dl Zd dlmZmZmZmZmZ d dlm	Z	m
Z
mZ d dlmZ d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Z ed           G d de
                      Zedk    r e	j                     dS dS )    N)cudaint32int64float32float64)unittestCUDATestCaseskip_on_cudasim)configc                     t          j        d          }|dk    rd| d<   t          j        d           | d         | |<   d S )N   r   *       )r   gridsyncwarp)aryis     y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/numba/cuda/tests/cudapy/test_warp_ops.pyuseful_syncwarpr      sB    	!AAvvAM*VCFFF    c                 d    t          j        d          }t          j        d||          }|| |<   d S Nr   r   r   r   	shfl_sync)r   idxr   vals       r   use_shfl_sync_idxr      s/    	!A
.Q
,
,CCFFFr   c                 d    t          j        d          }t          j        d||          }|| |<   d S r   )r   r   shfl_up_syncr   deltar   r   s       r   use_shfl_sync_upr"      s0    	!A

J5
1
1CCFFFr   c                 d    t          j        d          }t          j        d||          }|| |<   d S r   )r   r   shfl_down_syncr    s       r   use_shfl_sync_downr%      s0    	!A

j!U
3
3CCFFFr   c                 d    t          j        d          }t          j        d||          }|| |<   d S r   )r   r   shfl_xor_sync)r   xorr   r   s       r   use_shfl_sync_xorr)   !   s0    	!A

ZC
0
0CCFFFr   c                 d    t          j        d          }t          j        d|d          }|| |<   d S Nr   r   r   r   )r   intor   r   s       r   use_shfl_sync_with_valr-   '   s/    	!A
.T1
-
-CCFFFr   c                 n    t          j        d          }t          j        d| |                   }|||<   d S r   )r   r   all_syncary_inary_outr   preds       r   use_vote_sync_allr4   -   1    	!A=VAY//DGAJJJr   c                 n    t          j        d          }t          j        d| |                   }|||<   d S r   )r   r   any_syncr0   s       r   use_vote_sync_anyr8   3   r5   r   c                 n    t          j        d          }t          j        d| |                   }|||<   d S r   )r   r   eq_syncr0   s       r   use_vote_sync_eqr;   9   s1    	!A<
F1I..DGAJJJr   c                 \    t           j        j        }t          j        dd          }|| |<   d S )Nr   Tr   	threadIdxxballot_sync)r   r   ballots      r   use_vote_sync_ballotrB   ?   s+    Aj$//FCFFFr   c                 n    t          j        d          }t          j        d| |                   }|||<   d S r   )r   r   match_any_sync)r1   r2   r   rA   s       r   use_match_any_syncrE   E   s2    	!A VAY77FGAJJJr   c                 |    t          j        d          }t          j        d| |                   \  }}|r|nd||<   d S r+   )r   r   match_all_sync)r1   r2   r   rA   r3   s        r   use_match_all_syncrH   K   s?    	!A&z6!9==LFD&QGAJJJr   c                 (   t           j        j        }|dz  dk    rt          j        dd          }n\|dz  dk    rt          j        dd          }n=|dz  dk    rt          j        dd          }n|dz  d	k    rt          j        d
d          }|| |<   d S )N   r   Tr   """"   DDDD       r=   )arrr   rA   s      r   use_independent_schedulingrR   Q   s    A1uzz!*d33	
Q!!*d33	
Q!!*d33	
Q!!*d33CFFFr   c                 V    t           j        rdS t          j                    j        | k    S )NT)r   ENABLE_CUDASIMr   get_current_devicecompute_capability)ccs    r   _safe_cc_checkrX   ^   s*     Bt&((;rAAr   z2Warp Operations are not yet implemented on cudasimc                   (   e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Z ej         ed          d          d             Z ej         ed          d          d             Z ej         ed          d          d             Zd Zd ZdS )TestCudaWarpOperationsc                 
    t          j        d          t                    }d}t          j        |t          j                  } |d|f         |           |                     t          j        |dk                         d S )Nzvoid(int32[:])    dtyper   r   )r   jitr   npemptyr   
assertTrueallselfcompilednelemr   s       r   test_useful_syncwarpz+TestCudaWarpOperations.test_useful_syncwarpg   sv    -48,--o>>huBH---E3sby))*****r   c                     t          j        d          t                    }d}d}t          j        |t          j                  } |d|f         ||           |                     t          j        ||k                         d S Nvoid(int32[:], int32)r\   rJ   r]   r   )r   r_   r   r`   ra   r   rb   rc   )re   rf   rg   r   r   s        r   test_shfl_sync_idxz)TestCudaWarpOperations.test_shfl_sync_idxn   s~    4483445FGGhuBH---E3$$$scz**+++++r   c                 t    t          j        d          t                    }d}d}t          j        |t          j                  }t          j        |t          j                  }||d xx         |z  cc<    |d|f         ||           |                     t          j        ||k                         d S rj   )	r   r_   r"   r`   ra   r   arangerb   rc   re   rf   rg   r!   r   exps         r   test_shfl_sync_upz(TestCudaWarpOperations.test_shfl_sync_upv   s    4483445EFFhuBH---iRX...EFFuE3&&&scz**+++++r   c                 v    t          j        d          t                    }d}d}t          j        |t          j                  }t          j        |t          j                  }|d | xx         |z  cc<    |d|f         ||           |                     t          j        ||k                         d S rj   )	r   r_   r%   r`   ra   r   rn   rb   rc   ro   s         r   test_shfl_sync_downz*TestCudaWarpOperations.test_shfl_sync_down   s    4483445GHHhuBH---iRX...GeVGE3&&&scz**+++++r   c                 V    t          j        d          t                    }d}d}t          j        |t          j                  }t          j        |t          j                  |z  } |d|f         ||           |                     t          j        ||k                         d S )Nrk   r\      r]   r   )	r   r_   r)   r`   ra   r   rn   rb   rc   )re   rf   rg   r(   r   rp   s         r   test_shfl_sync_xorz)TestCudaWarpOperations.test_shfl_sync_xor   s    4483445FGGhuBH---iRX...4E3$$$scz**+++++r   c                 :   t           t          t          t          f}t	          j         d          t	          j        d          t	          j        t          j                  t	          j        t          j                  f}t          ||          D ]\  }} t          j        |d d          |f          t                    }d}t	          j
        ||j                  } |d|f         ||           |                     t	          j        ||k                         d S )Nl        r\   r]   r   )r   r   r   r   r`   pizipr   r_   r-   ra   r^   rb   rc   )re   typesvaluestypr   rf   rg   r   s           r   test_shfl_sync_typesz+TestCudaWarpOperations.test_shfl_sync_types   s    ugw.(2,, 1 1*RU##RZ%6%68E6** 	0 	0HC.txQQQ../EFFHE(5	222CHQXsC(((OOBF3#:..////	0 	0r   c                     t          j        d          t                    }d}t          j        |t          j                  }t          j        |t          j                  } |d|f         ||           |                     t          j        |dk                         d|d<    |d|f         ||           |                     t          j        |dk                         d S )Nvoid(int32[:], int32[:])r\   r]   r   r   rx   )	r   r_   r4   r`   onesr   ra   rb   rc   re   rf   rg   r1   r2   s        r   test_vote_sync_allz)TestCudaWarpOperations.test_vote_sync_all   s    7486778IJJbh///(5111E67+++w!|,,---r
E67+++w!|,,-----r   c                     t          j        d          t                    }d}t          j        |t          j                  }t          j        |t          j                  } |d|f         ||           |                     t          j        |dk                         d|d<   d|d<    |d|f         ||           |                     t          j        |dk                         d S )Nr   r\   r]   r   r   rM      )	r   r_   r8   r`   zerosr   ra   rb   rc   r   s        r   test_vote_sync_anyz)TestCudaWarpOperations.test_vote_sync_any   s    7486778IJJ%rx000(5111E67+++w!|,,---q	q	E67+++w!|,,-----r   c                 `    t          j        d          t                    }d}t          j        |t          j                  }t          j        |t          j                  } |d|f         ||           |                     t          j        |dk                         d|d<    |d|f         ||           |                     t          j        |dk                         d|d d <    |d|f         ||           |                     t          j        |dk                         d S )Nr   r\   r]   r   r   )	r   r_   r;   r`   r   r   ra   rb   rc   r   s        r   test_vote_sync_eqz(TestCudaWarpOperations.test_vote_sync_eq   s   7486778HII%rx000(5111E67+++w!|,,---q	E67+++w!|,,---qqq	E67+++w!|,,-----r   c                 .    t          j        d          t                    }d}t          j        |t          j                  } |d|f         |           |                     t          j        |t          j        d          k                         d S )Nvoid(uint32[:])r\   r]   r   r   )r   r_   rB   r`   ra   uint32rb   rc   rd   s       r   test_vote_sync_ballotz,TestCudaWarpOperations.test_vote_sync_ballot   s    .48-../CDDhuBI...E3sbi
&;&;;<<=====r   )   r   z-Matching requires at least Volta Architecturec                 |    t          j        d          t                    }d}t          j        |t          j                  dz  }t          j        |t          j                  }t          j        dd          } |d|f         ||           |                     t          j	        ||k                         d S )Nr   
   r]   rM   )iU  i  r   r   )
r   r_   rE   r`   rn   r   ra   tilerb   rc   )re   rf   rg   r1   r2   rp   s         r   test_match_any_syncz*TestCudaWarpOperations.test_match_any_sync   s     8486778JKK5111A5(5111g2A66E67+++w#~../////r   c                     t          j        d          t                    }d}t          j        |t          j                  }t          j        |t          j                  } |d|f         ||           |                     t          j        |dk                         d|d<    |d|f         ||           |                     t          j        |dk                         d S )Nr   r   r]   r   i  rJ   r   )	r   r_   rH   r`   r   r   ra   rb   rc   r   s        r   test_match_all_syncz*TestCudaWarpOperations.test_match_all_sync   s     8486778JKK%rx000(5111E67+++w,677888q	E67+++w!|,,-----r   z;Independent scheduling requires at least Volta Architecturec                 ,    t          j        d          t                    }t          j        dt          j                  }t          j        dd          } |d         |           |                     t          j        ||k                         d S )Nr   r\   r]   )rK   rL   rN   rP      r   r\   )	r   r_   rR   r`   ra   r   r   rb   rc   )re   rf   rQ   rp   s       r   test_independent_schedulingz2TestCudaWarpOperations.test_independent_scheduling   s     /48-../IJJhr+++gFJJscz**+++++r   c                     t           j        d             }t          j        dt          j                  } |d         |           t          j        dd          }t          j                            ||           d S )Nc                     t          j        d          }|dz  dk    rt          j                    | |<   d S t          j                    | |<   d S )Nr   rM   r   )r   r   
activemaskr?   r   s     r   use_activemaskz>TestCudaWarpOperations.test_activemask.<locals>.use_activemask   sJ    	!AA!|| ((! ((!r   r\   r]   r   )iUUUUl   *UU ru   )r   r_   r`   r   r   r   testingassert_equal)re   r   outexpecteds       r   test_activemaskz&TestCudaWarpOperations.test_activemask   s|    			) 		) 
		) hr+++uc""" 73R88

#.....r   c                 @   t           j        d             }t          j        dt          j                  } |d         |           t          j        d t          d          D             t          j                  }t          j                            ||           d S )Nc                 Z    t          j        d          }t          j                    | |<   d S )Nr   )r   r   lanemask_ltr   s     r   use_lanemask_ltz@TestCudaWarpOperations.test_lanemask_lt.<locals>.use_lanemask_lt  s%    	!A#%%AaDDDr   r\   r]   r   c                     g | ]
}d |z  dz
  S )rM   r    ).0r   s     r   
<listcomp>z;TestCudaWarpOperations.test_lanemask_lt.<locals>.<listcomp>  s     ???Q!|???r   )	r   r_   r`   r   r   asarrayranger   r   )re   r   r   r   s       r   test_lanemask_ltz'TestCudaWarpOperations.test_lanemask_lt  s    		& 	& 
	& hr+++s### :??U2YY???$&I/ / /

#.....r   N)__name__
__module____qualname__rh   rl   rq   rs   rv   r~   r   r   r   r   r   
skipUnlessrX   r   r   r   r   r   r   r   r   rZ   rZ   e   s       + + +, , ,, , ,, , ,, , ,	0 	0 	0	. 	. 	.
. 
. 
.. . .> > > X//HJ J0 0J J0 X//HJ J	. 	.J J	. X//() ), ,) ),/ / /*/ / / / /r   rZ   __main__)numpyr`   numbar   r   r   r   r   numba.cuda.testingr   r	   r
   
numba.corer   r   r   r"   r%   r)   r-   r4   r8   r;   rB   rE   rH   rR   rX   rZ   r   mainr   r   r   <module>r      s       6 6 6 6 6 6 6 6 6 6 6 6 6 6 F F F F F F F F F F                            ' ' '
 
 
B B B EFFj/ j/ j/ j/ j/\ j/ j/ GFj/Z zHMOOOOO r   