
    Pi3                         d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZ ddlmZmZmZ ddlmZ ej        j         G d	 d
ej        j                              Z G d dej        j                  ZdS )zD
A simple module swap UX for a float8 version of `torch.nn.Linear`.
    )OptionalN)Float8LinearConfigScalingType)tensor_already_casted_to_fp8)get_maybe_axiswise_dimhp_tensor_to_float8_dynamic)GemmInputRoleLinearMMConfigScaledMMConfig)!WeightWithDynamicFloat8CastTensorc            	       d    e Zd ZdZedej        dej        dedefd            Z	ed             Z
dS )	matmul_with_hp_or_float8_argsa  
    Like torch.matmul, but with the arguments in either high precision or float8.
    * if the arguments are in high precision, they are cast to float8 according
      to the specified config
    * if the arguments are in float8, we assume the cast honored the config
    input_hpweight_hp_tlinear_mm_configconfigc                    |                      ||           || _        || _        |}t          |          r|}nl|j        j        t          j        u r|}nQt          ||j        j	        |t          j        |j        j        t          d|j        j                  |j                  }t          |          r|}nl|j        j        t          j        u r|}nQt          ||j        j	        |t          j        |j        j        t          d|j        j                  |j                  }|j        }|                    d|d                   }	t'          j        |	|          }
 |
j        g |d d         |
j        d         R  }
|
S )Ngemm_input_rolescaling_granularityaxiswise_dimround_scales_to_power_of_2r   )save_for_backwardr   r   r   cast_config_inputscaling_typer   DISABLEDr   target_dtyper	   INPUTr   r   r   cast_config_weightWEIGHTshapereshapetorchmm)ctxr   r   r   r   cinput_maybe_fp8weight_maybe_fp8_t
orig_shapeinput_maybe_fp8_reshapedres_bitss              p/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/float8/float8_linear.pyforwardz%matmul_with_hp_or_float8_args.forward%   s    	h444/
'11 	&OO -1EEE&OO9#0  - 3$%$7$K3+?  ,-+G
 
 
O (44 	!,!.+2FFF!,!<$1  - 4$%$8$L3q+?  ,-+G
" 
" 
" %*
#2#:#:2z"~#N#N 846HII#8#IZ_IhnR6HIII    c                    | j         \  }}| j        }|j        }|                    d|d                   }t	          |          r|}nq|j        j        t          j        u r|}nVt          ||j        j
        | j        t          j        |j        j        t          d|j        j                  |j                  }t	          |          r|}nq|j        j        t          j        u r|}nVt          ||j        j
        | j        t          j        |j        j        t          d|j        j                  |j                  }t'          j        ||                                          }	 |	j        g |d d         |	j        d         R  }	|j        }
|                    d|
d                   }t	          |          r|}nq|j        j        t          j        u r|}nVt          ||j        j
        | j        t          j        |j        j        t          d|j        j                  |j                  }t	          |          r|}nq|j        j        t          j        u r|}nVt          ||j        j
        | j        t          j        |j        j        t          d|j        j                  |j                  }t'          j        |                                |          }d}|	|                                g|R S )Nr   r   r   )NN)saved_tensorsr   r"   r#   r   cast_config_grad_outputr   r   r   r   r   r   r	   GRAD_OUTPUTr   r   r   !cast_config_weight_for_grad_inputr!   r$   r%   t'cast_config_grad_output_for_grad_weight!cast_config_input_for_grad_weightr   )r&   grad_outputr   r   r'   grad_output_orig_shapegrad_output_reshaped#grad_output_reshaped_maybe_fp8_dim0weight_t_maybe_fp8_dim0
grad_inputinput_hp_orig_shapeinput_hp_reshaped#grad_output_reshaped_maybe_fp8_dim1input_reshaped_maybe_fp8_dim1grad_weightempty_gradss                   r-   backwardz&matmul_with_hp_or_float8_args.backward]   sN    # 1+J "-!2*2227Mb7QRR ((<== 	2F//&3{7KKK2F//2M$)6$ - 9$%$=$Q31E  ,-+G
3 
3 
3/ (44 	&1##0=AUUU&1##&A3@$ - 4$%$G$[3;O  ,-+G
' 
' 
'# X/#%%''
 

 (Z' 
#CRC(
*4*:2*>
 
 

 'n$,,R1DR1HII ((<== 	2F//5B#$ $ 3G//2M$9F$ - 9$%$M$a3q@T  ,-+G
3 
3 
3/ ((9:: 	,=))0=AUUU,=)),G!3@$ - 3$%$G$[3q:N  ,-+G
- 
- 
-) h/1133)
 

 !;==??8[888r/   N)__name__
__module____qualname____doc__staticmethodr$   Tensorr
   r   r.   rD    r/   r-   r   r      s          5,5 \5 )	5
 #5 5 5 \5n n9 n9 \n9 n9 n9r/   r   c                   |     e Zd ZdZ fdZdej        dej        fdZ fdZe		 d
de
e         fd	            Z xZS )Float8Lineara   
    Note: this is **not** a public API and is only intended to be used
    inside of this repository. Please file an issue if you would benefit
    from this being a public API.

    A wrapper around a `torch.nn.Linear` module which does fp8 compute.
    c           
         |                     d          } t                      j        |i | |j        j        | _        |j        j        | _        |j        j        | _	        || _
        t          t          |j        | j
        j        j        d| j
        j                  t          |j        | j
        j        j        d| j
        j                  t          |j        | j
        j        j        d| j
        j                            | _        dS )zv
        Additional arguments on top of `torch.nn.Linear`'s arguments:
        * `config`: Float8LinearConfig
        r   FN)popsuper__init__r   r   scaling_type_inputr    scaling_type_weightr2   scaling_type_grad_outputr   r
   r   emulategemm_config_outputuse_fast_accumpad_inner_dimgemm_config_grad_inputgemm_config_grad_weightr   )selfargskwargsr   	__class__s       r-   rQ   zFloat8Linear.__init__   s     H%%$)&))) #)":"G#)#<#I (.(F(S% ..=)	  2A)	  3B)	 !!
 !
r/   inputreturnc                 J   t          j                    r(t          j                    }|                    |          }t                              || j                                        | j        | j	                  }| j
        "|| j
                            |j                  z   }|S N)r$   is_autocast_enabledget_autocast_gpu_dtypetor   applyweightr5   r   r   biasdtype)r[   r_   autocast_dtypeoutputs       r-   r.   zFloat8Linear.forward   s     $&& 	- #9;;NHH^,,E.44KMMOO!K	
 
 9 dill6<888Fr/   c                    | j         }d|j                                         }d|j                                         }d|j                                         }|||g}|j        |j        k    r/|                    d|j                                                    |j        |j        k    r/|                    d|j                                                    |j        |j        k    r/|                    d|j                                                    d	                    |          }t                                                       d| d	}|S )
Nzi:zw:zgo:zi_gw:zw_gi:zgo_gw:,z, cast_configs=")r   r   	short_strr    r2   r7   appendr4   r6   joinrP   
extra_repr)	r[   r'   cicwcgopartscast_config_strsr^   s	           r-   rr   zFloat8Linear.extra_repr  s\   K3!%//11334!&002244;A-7799;;R.!2EEELLR!D!N!N!P!PRRSSS.!2FFFLLR!D!N!N!P!PRRSSS48QQQLLPBLLNNPP   ((5//ww!!##FFOFFFr/   Nr   c                    |t                      }t          j        d          5   | |j        |j        d|          }ddd           n# 1 swxY w Y   |j        |_        |j        |_        |j        rr|j        j	        t          j        u sJ t          j                            t          |j        |j        |j        j        j                  |j        j                  |_        |S )z
        Create an nn.Linear with fp8 compute from a regular nn.Linear

        Args:
            mod (torch.nn.Linear): nn.Linear to convert
            config (Optional[Float8LinearConfig]): configuration for conversion to float8
        NmetaF)rh   r   )requires_grad)r   r$   devicein_featuresout_featuresrg   rh   enable_fsdp_float8_all_gatherr    r   r   DYNAMICnn	Parameterr   r   r   r   r{   )clsmodr   new_mods       r-   
from_floatzFloat8Linear.from_float%  s-    >'))F\&!! 	 	c 	  G	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 x / 		,9[=PPPPP"X//1N,N5B 
 &n: 0  GN s   AAArb   )rE   rF   rG   rH   rQ   r$   rJ   r.   rr   classmethodr   r   r   __classcell__)r^   s   @r-   rM   rM      s         %
 %
 %
 %
 %
NU\ el    (    $  04) ) +,) ) ) [) ) ) ) )r/   rM   )rH   typingr   r$   torchao.float8.configr   r    torchao.float8.distributed_utilsr   #torchao.float8.float8_scaling_utilsr   r   %torchao.float8.float8_training_tensorr	   r
   r   torchao.float8.fsdp_utilsr   _dynamoallow_in_graphautogradFunctionr   r   LinearrM   rK   r/   r-   <module>r      sQ           A A A A A A A A I I I I I I                
 H G G G G G o9 o9 o9 o9 o9EN$; o9 o9 o9d@ @ @ @ @58? @ @ @ @ @r/   