
    `i?                         U d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
 ddlZddlZddlZddlZej                                        Zeoej                                        dk    Zer ej        d          ndZer ed           Zn ed           Z ed           Z ed	           Z ed
           Z ed           Z ed           Z ed           Z ed           Z ed           Z ed           Z ed           Z  ed           Z! ed           Z" ed           Z# ed           Z$ ed           Z% ed           Z&d Z'd Z(d Z)d Z*d Z+d Z, ed           Z-e.e/d<    ed            Z0e.e/d!<    ed"           Z1e.e/d#<    ed$           Z2e.e/d%<   eoe Z3e.e/d&<    ed'           Z4e.e/d(<   d) Z5d* Z6d+ Z7d, Z8 ed-           Z9e.e/d.<    ed/           Z:e.e/d0<    ed1           Z;e.e/d2<    ed3           Z9e.e/d.<    ed4           Z<e.e/d5<   er.	 ddl=Z>e>j        ?                                Z@n# eA$ r d6Z@d6ZY nw xY wd6Z@d6aBd7 ZCejD        d8             ZEejD        dKd:            ZFejD        d;             ZGdLd=ZHd> ZId? ZJd@ ZKdA ZLdB ZMdC ZN eM            ZO eN            ZPdDejQ        jR        dfdEZSdDejT        ejQ        jR        dfdFZUdG ZVdH ZWdI ZXdJ ZYesej                                        rJ dS dS )Mz>This file is allowed to initialize CUDA context when imported.    N)LazyVal
TEST_NUMBATEST_WITH_ROCM	TEST_CUDA
IS_WINDOWSIS_MACOS   zcuda:0c                      t           S N)r        w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/testing/_internal/common_cuda.py<lambda>r      s     r   c                      t           o<t          j        j                            t          j        dt                              S )N      ?device)r   torchbackendscudnnis_acceptabletensorCUDA_DEVICEr   r   r   r   r      s1    !wu~/C/Q/QRWR^_ajuRvRvRv/w/w r   c                  Z    t           r#t          j        j                                        ndS )Nr   )
TEST_CUDNNr   r   r   versionr   r   r   r   r      s"    z%XU^%9%A%A%C%C%CWX r   c                      t           j        j        rCt          d t           j        j                            d          d d         D                       ndS )Nc              3   4   K   | ]}t          |          V  d S r   int).0vs     r   	<genexpr>z<lambda>.<locals>.<genexpr>   s(      %W%Wc!ff%W%W%W%W%W%Wr   .r	   r   r   )r   r   hiptuplesplitr   r   r   r   r      sN    [`[h[l x%W%Wem6G6M6Mc6R6RSUTUSU6V%W%W%W W W Wrx r   c                      t           j                                        o!t           j                                        dk    S )N)      r   cudais_availableget_device_capabilityr   r   r   r   r      -    ej5577hEJ<\<\<^<^bh<h r   c                      t           j                                        o!t           j                                        dk    S )N)   r   r,   r   r   r   r   r      r0   r   c                      t           j                                        o!t           j                                        dk    S )N   r   r,   r   r   r   r   r      r0   r   c                      t           j                                        o!t           j                                        dk    S )N)r5   r*   r,   r   r   r   r   r       r0   r   c                      t           j                                        o!t           j                                        dk    S )N)   r   r,   r   r   r   r   r   !   r0   r   c                      t           j                                        o!t           j                                        dk    S Nr8   	   r,   r   r   r   r   r   "   r0   r   c                      t           j                                        o!t           j                                        dk    S N)r<   r   r,   r   r   r   r   r   #   r0   r   c                      t           j                                        o!t           j                                        dk    S N)
   r   r,   r   r   r   r   r   $   -    uz6688jUZ=]=]=_=_cj=j r   c                      t           j                                        o!t           j                                        dk    S )N)   r   r,   r   r   r   r   r   %   rB   r   c                      t           j                                        oOt           j                                        d         dk    o't           j                                        d         dk    S )Nr   rA      r,   r   r   r   r   r   '   sZ    %*1133 @
8X8X8Z8Z[\8]ac8c @j6688;a? r   c                      t           j                                        o&t           j                                        dv pt          S )N))r5   r	   )r8   r5   )r   r-   r.   r/   IS_THORr   r   r   r   r   )   s3    EJ3355}5:;[;[;];]aq;q;|u| r   c                      t           j                                        o!t           j                                        dk    S r:   r,   r   r   r   r   r   *   -    %*1133d
8X8X8Z8Z^d8d r   c                      t           j                                        o!t           j                                        dk    S r>   r,   r   r   r   r   r   +   rJ   r   c                      t           j                                        o!t           j                                        dk    S r@   r,   r   r   r   r   r   ,   s-    5:2244f9Y9Y9[9[_f9f r   c                    t           j                                        sdS t           j                            d          j        }t
          j                            d|          t          fd| D                       S )NFr-   /PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDEc              3       K   | ]}|v V  	d S r   r   )r!   archeffective_archs     r   r#   z+evaluate_gfx_arch_within.<locals>.<genexpr>5   s(      <<$t~%<<<<<<r   )	r   r-   r.   get_device_propertiesgcnArchNameosenvirongetany)	arch_listgcn_arch_namerQ   s     @r   evaluate_gfx_arch_withinrZ   .   sr    :""$$ uJ44V<<HMZ^^$UWdeeN <<<<)<<<<<<r   c                  $    t          g d          S )N)gfx940gfx941gfx942gfx950rZ   r   r   r   CDNA3OrLaterra   7   s    #$L$L$LMMMr   c                  $    t          ddg          S )Ngfx90ar^   r`   r   r   r   CDNA2OrLaterrd   :   s    #Xx$8999r   c                      t           r>g d} t          j                            dd          dk    r| g dz  } t	          |           S t
          rt           ot          S dS )Nrc   r^   gfx1100gfx1201r_   'TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL0gfx1101gfx1150gfx1151gfx1200F)r   rT   rU   rV   rZ   r   r   SM80OrLaterrX   s    r   *evaluate_platform_supports_flash_attentionrr   =   sj     3HHH	:>>CSIISPPEEEEI'	222 .~-+-5r   c                      t           r>g d} t          j                            dd          dk    r| g dz  } t	          |           S t
          rdS dS )Nrf   ri   rj   rk   TF)r   rT   rU   rV   rZ   r   rq   s    r   .evaluate_platform_supports_efficient_attentionrt   G   sb     3HHH	:>>CSIISPPEEEEI'	222 t5r   c                  6    t            ot          o
t          dk    S )Ni_ )r   rp   TEST_CUDNN_VERSIONr   r   r   *evaluate_platform_supports_cudnn_attentionrw   Q   s    QKQ5G55PQr   c                      t                      S r   )rr   r   r   r   r   r   T       :d:f:f r   !PLATFORM_SUPPORTS_FLASH_ATTENTIONc                      t                      S r   )rt   r   r   r   r   r   U   s    <j<l<l r   #PLATFORM_SUPPORTS_MEM_EFF_ATTENTIONc                      t                      S r   )rw   r   r   r   r   r   V   ry   r   !PLATFORM_SUPPORTS_CUDNN_ATTENTIONc                  ,    t           pt          pt          S r   )rz   r~   r|   r   r   r   r   r   X   s    :[ ;V2S;V2U r   !PLATFORM_SUPPORTS_FUSED_ATTENTIONPLATFORM_SUPPORTS_FUSED_SDPAc                      t           ot          S r   )r   rp   r   r   r   r   r   ^   s    y/H[ r   PLATFORM_SUPPORTS_BF16c                     t           j                                        rt           j        j        rsdg} t
          dk    r|                     dg           t
          dk    r|                     d           | D ]+}|t           j                            d          j	        v r dS ,n)t          p!t           j                                        dk    S d	S )
Ngfx94)r2   r+   gfx120)r2   r*   gfx95r   Tr;   F)r   r-   r.   r   r&   ROCM_VERSIONextendappendrR   rS   SM90OrLaterr/   archsrP   s     r   evaluate_platform_supports_fp8r   `   s    z   O= 
	OIEv%%hZ(((v%%W%%%    5:;;A>>JJJ44 K  N%*"B"B"D"D"NN5r   c                  *   t           j                                        rtt           j        j        rTdt           j                                        vrdS dg} | D ]+}|t           j                            d          j        v r dS ,nt          ot           S dS )NUSE_FBGEMM_GENAIFr^   r   T)r   r-   r.   r   r&   
__config__showrR   rS   r   SM100OrLaterr   s     r   +evaluate_platform_supports_fp8_grouped_gemmr   o   s    z   	4= 	4!)9)>)>)@)@@@uJE    5:;;A>>JJJ44 K  3|#335r   c                      t           j                                        rJt           j        j        r2t
          dk    r&dt           j                            d          j        v S nt          S dS )Nr4   r_   r   F)	r   r-   r.   r   r&   r   rR   rS   r   r   r   r   "evaluate_platform_supports_mx_gemmr   |   s`    z    = 	 v%%5:#C#CA#F#F#RRR &  5r   c                      t           j                                        r:t           j        j        s)dt           j                                        v } | ot          S dS )Nr   F)r   r-   r.   r   r&   r   r   IS_SM100)built_with_fbgemm_genais    r   -evaluate_platform_supports_mxfp8_grouped_gemmr      sN    z   4): 4"48H8M8M8O8O"O&3835r   c                      t                      S r   )r   r   r   r   r   r      s    2T2V2V r   PLATFORM_SUPPORTS_MX_GEMMc                      t                      S r   )r   r   r   r   r   r      s    .L.N.N r   PLATFORM_SUPPORTS_FP8c                      t                      S r   )r   r   r   r   r   r      s    ;f;h;h r   "PLATFORM_SUPPORTS_FP8_GROUPED_GEMMc                      t           ot          S r   )r   r   r   r   r   r   r      s    )2L r   c                      t                      S r   )r   r   r   r   r   r      s    =j=l=l r   $PLATFORM_SUPPORTS_MXFP8_GROUPED_GEMMFc                      t           s
J d            t          sKt          t          j                                                  D ]} t          j        dd|             dad S d S )Nz?CUDA must be available when calling initialize_cuda_context_rngrF   zcuda:r   T)r   __cuda_ctx_rng_initializedranger   r-   device_countrandn)is    r   initialize_cuda_context_rngr      sw    WWWWW9% *uz..0011 	/ 	/AK+!++.....%)"""	* *r   c               #   v  K   t           j        j        j        j        } 	 dt           j        j        j        _        t           j        j                            d d d d          5  d V  d d d            n# 1 swxY w Y   | t           j        j        j        _        d S # | t           j        j        j        _        w xY w)NFenabled	benchmarkdeterministic
allow_tf32r   r   r-   matmulr   r   flagsold_allow_tf32_matmuls    r   tf32_offr      s      !N/6AF05"-^!''TXej'kk 	 	EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 1F"---0E"-EEEE0   AB "A3'B 3A77B :A7;B B8h㈵>c              #     K   t           j        j        r/t          j                            dd           }dt          j        d<   t           j        j        j        j	        }| j
        }	 dt           j        j        j        _	        || _
        t           j        j                            d d d d          5  d V  d d d            n# 1 swxY w Y   t           j        j        r||t          j        d<   nt          j        d= |t           j        j        j        _	        || _
        d S # t           j        j        r||t          j        d<   nt          j        d= |t           j        j        j        _	        || _
        w xY w)NHIPBLASLT_ALLOW_TF321Tr   )r   r   r&   rT   rU   rV   r   r-   r   r   	precisionr   r   )selftf32_precisionhip_allow_tf32r   old_precisions        r   tf32_onr      s     } 1(>EE-0
)*!N/6ANM'04"-'^!''TXei'jj 	 	EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 = 	7)5C
122J560E"-& = 	7)5C
122J560E"-&&&&&s2   &A
D  0C5D  CD  C	D   AE4c               #   v  K   t           j        j        j        j        } 	 dt           j        j        j        _        t           j        j                            dddd          5  dV  ddd           n# 1 swxY w Y   | t           j        j        j        _        dS # | t           j        j        j        _        w xY w)z
    Context manager to temporarily enable TF32 for CUDA operations.
    Restores the previous TF32 state after exiting the context.
    TNr   r   r   s    r   tf32_enabledr      s       "N/6AF04"-^!''D ( 
 
 	 	 EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
 1F"---0E"-EEEEr   Tc                 ,     d  fdfd}|S )Nc                 h    t                      5   |             d d d            d S # 1 swxY w Y   d S r   r   )r   function_calls     r   with_tf32_disabledz+tf32_on_and_off.<locals>.with_tf32_disabled   s{    ZZ 	 	MOOO	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   '++c                 n    t          |           5   |             d d d            d S # 1 swxY w Y   d S r   )r   )r   r   r   s     r   with_tf32_enabledz*tf32_on_and_off.<locals>.with_tf32_enabled   s    T>** 	 	MOOO	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   *..c                      t          j                   j        }t          |                                          t          j                    fd            }|S )Nc                                          t          |                      t          j                                        o}dv r%|o"t          j        d                   j        dk    }dv r"|od         t          j        t          j        hv }|r. d         fd            d         fd           d S  di  d S )Nr   r-   dtyper   c                        di S Nr   r   fkwargss   r   r   zCtf32_on_and_off.<locals>.wrapper.<locals>.wrapped.<locals>.<lambda>  s    11;;v;; r   c                        di S r   r   r   s   r   r   zCtf32_on_and_off.<locals>.wrapper.<locals>.wrapped.<locals>.<lambda>  s    !!++f++ r   r   )	updatezipr   r-   is_tf32_supportedr   typefloat32	complex64)argsr   cond	arg_namesr   only_ifr   r   s    ` r   wrappedz1tf32_on_and_off.<locals>.wrapper.<locals>.wrapped  s    MM#i..///://11=gD6!!OfX.>!?!?!D!N&  UU]EO4T!T ""6&>3F3F3F3F3FGGG!!&.2E2E2E2E2EFFFFFFr   )inspect	signature
parametersr'   keys	functoolswraps)r   paramsr   r   r   r   r   s   `  @r   wrapperz tf32_on_and_off.<locals>.wrapper  sv    "1%%0&++--((					 	 	 	 	 	 	 	 
		 r   r   )r   r   r   r   r   s   `` @@r   tf32_on_and_offr      sX                & Nr   c                 F     t          j                    fd            }|S )Nc                  d    t                      5   | i |cd d d            S # 1 swxY w Y   d S r   r   )r   r   r   s     r   r   zwith_tf32_off.<locals>.wrapped  s    ZZ 	& 	&1d%f%%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   %)))r   r   )r   r   s   ` r   with_tf32_offr     s8    _Q& & & & & Nr   c                     dt           j                                        vrdS t           j                                                            d          } t           j                                        | t	          d          z   d                              d          d         }t          d |                    d          D                       S )NMagmar%   zMagma 
r   c              3   4   K   | ]}t          |          V  d S r   r   r!   xs     r   r#   z%_get_magma_version.<locals>.<genexpr>)  s(      88AQ888888r   r$   )r   r   r   findlenr(   r'   )positionversion_strs     r   _get_magma_versionr   $  s    e&++----v$$&&++H55H"''))(S]]*B*C*CDJJ4PPQRSK88!2!23!7!7888888r   c                      t           j        j        dS t          t           j        j                  } t	          d |                     d          D                       S )Nr%   c              3   4   K   | ]}t          |          V  d S r   r   r   s     r   r#   z*_get_torch_cuda_version.<locals>.<genexpr>/  (      99AQ999999r   r$   )r   r   r-   strr'   r(   )cuda_versions    r   _get_torch_cuda_versionr   +  sO    }!vu})**L99!3!3C!8!8999999r   c                     t           rt          j        j        dS t	          t          j        j                  } |                     dd          d         } t          d |                     d          D                       S )Nr%   -rF   maxsplitr   c              3   4   K   | ]}t          |          V  d S r   r   r   s     r   r#   z*_get_torch_rocm_version.<locals>.<genexpr>6  r   r   r$   r   r   r   r&   r   r(   r'   )rocm_versions    r   _get_torch_rocm_versionr  1  ss     U].6vu}())L%%cA%66q9L99!3!3C!8!8999999r   c                      t            S r   )r   r   r   r   !_check_cusparse_generic_availabler  8  s    r   c                     t           sdS t          j        j        sdS t	          t          j        j                  } |                     dd          d         } t          d |                     d          D                       }|d u p|dk      S )	NFr   rF   r   r   c              3   4   K   | ]}t          |          V  d S r   r   r   s     r   r#   z5_check_hipsparse_generic_available.<locals>.<genexpr>C  s(      GG!s1vvGGGGGGr   r$   )r*   rF   r  )r  rocm_version_tuples     r   "_check_hipsparse_generic_availabler	  ;  s     u= uu}())L%%cA%66q9LGG|/A/A#/F/FGGGGG"d*I.@6.IJJr   r-   c                 f   t           j                            t           j                            dd          t           j                            dd                                        |           }t           j                            t           j                            dd          t           j                            dd                                        |           }t          j                    5  t          |                                |                                          D ]\  }}|                    |           	 d d d            n# 1 swxY w Y   ddi}||	                    |            ||                                fi |} ||                                fi |}	||||	fS )Nr8   r   lrr   )
r   nn
SequentialLineartono_gradr   r   copy_r   )
r   optimizer_ctoroptimizer_kwargsmod_controlmod_scalingcsr   opt_controlopt_scalings
             r   !_create_scaling_models_optimizersr  K  s    (%%ehooa&;&;UX__QPQ=R=RSSVV^dVeeK(%%ehooa&;&;UX__QPQ=R=RSSVV^dVeeK	  ..00+2H2H2J2JKK 	 	DAqGGAJJJJ	               C[F#&''' .!7!7!9!9DDVDDK .!7!7!9!9DDVDDK[+==s   4AEEEc           	         t          j        d||           t          j        d||           ft          j        d||           t          j        d||           ft          j        d||           t          j        d||           ft          j        d||           t          j        d||           fg}t           j                                                            |           }d}t          | ||          |||fz   S )N)r8   r8   )r   r   r	   )r   r  r  )r   r   r  MSELossr  r  )r   r   r  r  dataloss_fn	skip_iters          r   _create_scaling_caser   ]  s   [uV<<<ek&X]fl>m>m>mn[uV<<<ek&X]fl>m>m>mn[uV<<<ek&X]fl>m>m>mn[uV<<<ek&X]fl>m>m>mnpD
 h  ##F++GI,nGW  	w	"# #r   c                 <    t           s| nt          j        |           S r   )IS_SM89unittestexpectedFailurefuncs    r   xfailIfSM89r'  l  s    B44H$<T$B$BBr   c                 <    t           s| nt          j        |           S r   )r   r#  r$  r%  s    r   xfailIfSM100OrLaterr)  o      #G44)A$)G)GGr   c                 <    t           s| nt          j        |           S r   )SM120OrLaterr#  r$  r%  s    r   xfailIfSM120OrLaterr-  r  r*  r   c                 J    t           s	t          s| nt          j        |           S r   )r   	IS_JETSONr#  r$  r%  s    r   xfailIfDistributedNotSupportedr0  u  s!     RIR44H4LT4R4RRr   )r   )r   T)Z__doc__r   r   
torch.cuda$torch.testing._internal.common_utilsr   r   r   r   r   r   r   
contextlibrT   r#  r-   is_initialized"CUDA_ALREADY_INITIALIZED_ON_IMPORTr   TEST_MULTIGPUr   r   r   rv   r   SM53OrLaterSM60OrLaterSM70OrLaterSM75OrLaterrp   SM89OrLaterr   r   r,  rH   r/  r"  IS_SM90r   rZ   ra   rd   rr   rt   rw   rz   bool__annotations__r|   r~   r   r   r   r   r   r   r   r   r   r   r   
numba.cudanumbar.   TEST_NUMBA_CUDA	Exceptionr   r   contextmanagerr   r   r   r   r   r   r   r  r  r	  TEST_CUSPARSE_GENERICTEST_HIPSPARSE_GENERICoptimSGDr  floatr   r'  r)  r-  r0  r   r   r   <module>rJ     s   F E E          u u u u u u u u u u u u u u u u      				  &+Z%>%>%@%@ " <ej55771<(1;lel8$$$t y**++JJwwxxJWXXYY wxxyyghhiighhiighhiighhiighhiighhiighhiiwjjkkwjjkk
' @ @ A AG}}~~	
'dd
e
e
'dd
e
e7ffgg= = =N N N: : :    R R R +2'2f2f*g*g !4 g g g,3G4l4l,m,m #T m m m*1'2f2f*g*g !4 g g g*1' 3V 3V +W +W !4 W W W &/%E~3E d E E E&w'H'HII  I I I         #*'*V*V"W"W 4 W W W%g&N&NOO t O O O+273h3h+i+i "D i i i")'*L*L"M"M 4 M M M-4W5l5l-m-m $d m m m *1133   


 O # * * * F F F ' ' ' '* F F FX   F  9 9 9: : :: : :  	K 	K 	K :9;; ;;==  .4EKOfj > > > >$ !'ek%+/lp # # # #C C CH H HH H HS S S * +z((*****+ +**s   ,I
 
	II