
    Pi5                         U d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlm	Z	  ej
                    Zej        ed<    G d de j                  Z G d d	e j                  Ze G d
 d                      Z e            Zej        Zej        Z ed           G d d                      Z ed           G d d                      Z G d de j                  Z ed           G d d                      ZdS )    N)	dataclass)OptionalUnion)is_MI300loggerc                       e Zd ZdZdZd ZdS )ScalingTypedynamicdisabledc                 F    | t           j        u rdS | t           j        u sJ dS )Ndyndis)r	   DYNAMICDISABLEDselfs    i/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/float8/config.py	short_strzScalingType.short_str   s.    ;&&&5;/////5    N)__name__
__module____qualname__r   r   r    r   r   r	   r	      s/        G H    r   r	   c                        e Zd ZdZdZdZd ZdS )ScalingGranularityzM
    Defines the granularity of scaling strategies for casting to float8
    
tensorwiseaxiswisec                 F    | t           j        u rdS | t           j        u sJ dS )Ntenaxs)r   
TENSORWISEAXISWISEr   s    r   r   zScalingGranularity.short_str,   s0    %0005-666665r   N)r   r   r   __doc__r!   r"   r   r   r   r   r   r   !   s;         
 J H    r   r   c                   4    e Zd ZdZej        Zej        Zd Z	dS )Float8TypeConfigz
    Configuration for selecting the preferred float8 type pair, either e4m3fn/e5m2 or e4m3fnuz/e5m2fnuz.

    Currently, ROCm supports 1. fnuz variants in MI300. 2. OCP F8 variants in MI350/Navi4.
    c                     t           j        j        rPt           j                                        r4t                      r(t           j        | _        t           j        | _	        d S d S d S d S )N)
torchversionhipcudais_availabler   float8_e4m3fnuz
e4m3_dtypefloat8_e5m2fnuz
e5m2_dtyper   s    r   __post_init__zFloat8TypeConfig.__post_init__B   se    = 	4!8!8!:!: 	4xzz 	4#3DO#3DOOO	4 	4 	4 	4 	4 	4r   N)
r   r   r   r#   r'   float8_e4m3fnr-   float8_e5m2r/   r0   r   r   r   r%   r%   4   sA          $J "J4 4 4 4 4r   r%   T)frozenc                   t    e Zd ZU dZej        Zeed<   ej	        Z
eed<   dZeej                 ed<   d Zd ZdS )
CastConfigzC
    Configuration for maybe casting a single tensor to float8
    scaling_typescaling_granularityNtarget_dtypec                     t           dt          di| j                 }| j                                         d| j                                         d| S )Ne4m3e5m2_)r-   r/   r8   r6   r   r7   )r   dtypes     r   r   zCastConfig.short_strX   sS    VZ89JK#--//``$2J2T2T2V2V``Y^```r   c                     | j         t          j        u r| j        t          j        u s
J d            | j        &| j        j        r| j        j        dk    sJ d            d S d S )NzGonly dynamic scaling type is supported for axiswise scaling granularity   z)must specify a 8-bit floating-point dtype)	r7   r   r"   r6   r	   r   r8   is_floating_pointitemsizer   s    r   r0   zCastConfig.__post_init__\   s    #'9'BBB$(;;;;Y <;;  (/ )484E4NRS4S4S4S6 5T4S
((4S4Sr   )r   r   r   r#   r	   r   r6   __annotations__r   r!   r7   r8   r   r'   r=   r   r0   r   r   r   r5   r5   N   s           !, 3L+333.@.K+KKK*.L(5;'...a a a7 7 7 7 7r   r5   c                   "    e Zd ZU dZdZeed<   dS )Float8GemmConfigz*
    Configuration for a float8 gemm.
    Fuse_fast_accumN)r   r   r   r#   rE   boolrB   r   r   r   rD   rD   f   s.           !ND     r   rD   c                       e Zd ZdZdZdZdS )Float8LinearRecipeNamer   rowwiserowwise_with_gw_hpN)r   r   r   r!   ROWWISEROWWISE_WITH_GW_HPr   r   r   rH   rH   r   s$        J
 G .r   rH   c                      e Zd ZU dZ e            Zeed<   dZee         ed<    e            Z	eed<   dZ
ee         ed<    e            Zeed<   dZee         ed<    ed	
          Zeed<    e            Zeed<    e            Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zedeeef         dd fd            ZdS )Float8LinearConfigz]
    Configuration for converting a `torch.nn.Linear` module to float8
    for training.
    cast_config_inputN!cast_config_input_for_grad_weightcast_config_weight!cast_config_weight_for_grad_inputcast_config_grad_output'cast_config_grad_output_for_grad_weightT)rE   gemm_config_outputgemm_config_grad_inputgemm_config_grad_weightFenable_fsdp_float8_all_gatherpad_inner_dimemulate!force_recompute_fp8_weight_in_bwdround_scales_to_power_of_2c                    | j         !t                              | d| j                   | j        !t                              | d| j                   | j        !t                              | d| j                   | j        j        t          j
        k    r| j        rJ d| j        j                     | j        }| j        }| j        }| j         }| j        }| j        }||df||df||dffD ]?\  }}}	|j        t          j        u }
|j        t          j        u }|
|k    sJ d|	             @||d	t          f||d
t          f||dt           ffD ]j\  }}}}|j        t                              |d|           |j        t                              |d|           |j        |j        k    sJ | d            k| j        rt&                              d           d S d S )NrP   rR   rT   zPenable_fsdp_float8_all_gather only supports tensorwise scaling granularity, got output
grad_inputgrad_weightz#incompatible operand precision for inputweightgrad_outputr8   z< must be cast to the same dtype in both matmuls it's used inz`config.force_recompute_fp8_weight_in_bwd` is deprecated and will be removed in a future release. Please see https://github.com/pytorch/ao/issues/2251 for more details.)rP   object__setattr__rO   rR   rQ   rT   rS   r7   r   r!   rX   r6   r	   r   r-   r/   r8   r[   r   warning)r   cc_icc_wcc_gocc_i_gwcc_w_gicc_go_gwcc1cc2	gemm_nameis_disabled_1is_disabled_2operand_namedefault_dtypes                 r   r0   z Float8LinearConfig.__post_init__   s    1994;Q   1994;R   7?9,   "6:L:WWW9   Qcgcz  dO  Q  Q 9
 %&,88?
 4"G\*h.$
 		 		Ci
  ,0DDM,0DDM M111AiAA 2111
 7GZ07Hj1HmZ86
 	 	1ClM '""3FFF'""3FFF#s'7777]]] 8777 1 	NN {    	 	r   recipe_namereturnc           	         t          |           t          k    r4d t          D             }| |v sJ d|  d|             t          |           } | t          j        u rt	                      S | t          j        u rst          t          j        t                    }t          t          j        t                    }t          t          j        t                    }t	          |||d          S | t          j
        u rt          t          j                  }t          t          j                  }t          t          j        t                    }t          t          j                  }t          t          j                  }t          t          j        t          	          }t	          ||||||d
          S t          d|            )z
        Input: `Float8LinearRecipeName` value, or a string representing a `Float8LinearRecipeName` value
        Output: a `Float8LinearConfig` configured to implement the specified recipe
        c                     g | ]	}|j         
S r   )value).0ns     r   
<listcomp>z7Float8LinearConfig.from_recipe_name.<locals>.<listcomp>  s    CCCq17CCCr   zrecipe_name z not in valid names )r7   r8   T)rO   rQ   rS   r\   )r7   )r6   )r6   r8   )rO   rQ   rS   rP   rR   rT   r\   zunknown recipe_name )typestrrH   r!   rN   rK   r5   r   r"   r-   rL   r	   r   AssertionError)rt   valid_namesrg   rh   ri   rk   rj   rl   s           r   from_recipe_namez#Float8LinearConfig.from_recipe_name  s    ##CC,BCCCK+---M{MMMM .-- 1==K0;;;%'''2:::$6$?j  D $6$?j  D $6$?j  E &"&#'(-+/    2EEE2D2MNNND2D2MNNND $6$?j  E !5G5RSSSG !k.BCCCG!(1
  H &"&#'(-29298@+/    !!E!E!EFFFr   )r   r   r   r#   r5   rO   rB   rP   r   rQ   rR   rS   rT   rD   rU   rV   rW   rX   rF   rY   rZ   r[   r\   r0   staticmethodr   rH   r}   r   r   r   r   rN   rN      s         * %/JLLz000>B%x
';BBB%/Z\\
111>B%x
';BBB*4*,,Z666DH+Xj-AHHH ,<+;4+P+P+P(PPP/?/?/A/A,AAA0@0@0B0B-BBB +0!4///  M4 GT
 /4%t333 (-,,,B B BH @G1367@G	@G @G @G \@G @G @Gr   rN   )enumloggingdataclassesr   typingr   r   r'   torchao.utilsr   	getLoggerr   LoggerrB   Enumr	   r   r%   type_configr-   r/   r5   rD   rH   rN   r   r   r   <module>r      s9     ! ! ! ! ! ! " " " " " " " "  " " " " " "**,, , , ,    $)          & 4 4 4 4 4 4 4 4(   #
#
 $7 7 7 7 7 7 7 7. $! ! ! ! ! ! ! !. . . . .TY . . .0 $IG IG IG IG IG IG IG IG IG IGr   