
    Pi'              
          d dl Z d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
mc mZ d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ  ej                    de	j        ddfd	            Zej        j        j        j        ej        j        j         j        ej        j        j!        j"        ej        j        j#        j        ej        j        j$        j        ej        j        j%        j        ej        j        j&        j        ej        j        j'        j        ej        j        j(        j"        ej        j        j)        j        h
Z* G d
 dej"                  Z+ej,        -                    e+g           dS )    N)AnyListOptionalSetTuple)suggest_memory_format)hp_tensor_to_float8_dynamic)Float8TrainingTensorGemmInputRoleLinearMMConfighp_tensor_and_scale_to_float8)EPSmodulereturnc                 F   ddl m ddlm t          j                            d           fd|                                 D             }d |D             }d |D             }|sdS |\  }t	          j        |t          j
        	          }t	          j        |          }t	          j        |t                    }|j        }|                    t          j                  }t	          j        |          j        |z  }|t          j        u r7t	          j        |t	          j        t          j                  j        
          }|                                                    t          j                  }	t-          |          D ]\  }
}|	|
         |j        j        _        dS )aI  
    Calculate scale dynamically for all float8 parameters.
    This should be run after the optimizer step. It performs a single all-reduce to compute the
    scales for all float8 weights.
    Example usage:
        model(input).sum().backward()
        optim.step()
        precompute_float8_dynamic_scale_for_fsdp(model)
    r   DTensor)Float8Linearz7torchao.float8.precompute_float8_dynamic_scale_for_fsdpc                     g | ]H}t          |          t          |j                  't          |j        j        t                    F|IS  )
isinstanceweight_local_tensor!WeightWithDynamicFloat8CastTensor).0mr   r   s     m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/float8/fsdp_utils.py
<listcomp>z<precompute_float8_dynamic_scale_for_fsdp.<locals>.<listcomp>.   si     * * *a&&* qx))	*
 qx-/PQQ*	* * *    c                     g | ]	}|j         
S r   )r   r   float8_linears     r   r   z<precompute_float8_dynamic_scale_for_fsdp.<locals>.<listcomp>5   s    WWW}m2WWWr   c                 0    h | ]}|j         j        j        S r   )configcast_config_weighttarget_dtyper!   s     r   	<setcomp>z;precompute_float8_dynamic_scale_for_fsdp.<locals>.<setcomp>6   s1     ' ' ' 	/<' ' 'r   N)ord)max)torch.distributed._tensorr   torchao.float8.float8_linearr   torch_C_log_api_usage_oncemodules_foreach_normmathinfstackclampr   dtypetofloat64finfor)   float16to_localfloat32	enumerater   r   _precomputed_scale)r   float8_linearsweightstarget_dtypesr&   max_weightsamax_tensororigin_dtypescale_tensorlocal_scale_tensorir"   r   r   s               @@r   (precompute_float8_dynamic_scale_for_fsdprG      s    211111999999	H  A  * * * * *!!* * *N XWWWWG' '+' ' 'M
  #O\ %g48<<<K+k**K +k3//K $L..//K;|,,0;>Lu}$${<U[5O5O5STTT%..0033EMBB%n55 V V=@RST@U*==V Vr   c                   B   e Zd Ze	 ddej        dedej        deej                 fd            Z		 ddej        dedej        deej                 fdZ
edd            Zd	 Zed
             Zd Zd Zdddeej        df         dedej        deej                 fdZdS )r   Ntensorlinear_mm_configr5   precomputed_scalec                 &   t           j                            | |                                |                                |                                t          |          |j        |j        |j	        |
                                |j        
  
        S )N)stridesstorage_offsetmemory_formatr5   layoutdevice
pin_memoryrequires_grad)r,   Tensor_make_wrapper_subclasssizestriderN   r   r5   rP   rQ   	is_pinnedrS   )clsrI   rJ   r5   rK   s        r   __new__z)WeightWithDynamicFloat8CastTensor.__new__   sz     |22KKMMMMOO!0022/77,=='')) . 3 
 
 	
r   c                 >    || _         || _        || _        || _        d S N)_tensor_linear_mm_config_dtyper=   )selfrI   rJ   r5   rK   s        r   __init__z*WeightWithDynamicFloat8CastTensor.__init__   s*     !1 #4r   c                 n   |t           j        j        j        j        k    r2t          |d         j        |d         j        |d         j                  S d d fd}t          j
        t
          |||pi f          \  }} ||i |}|t          vr|S t          j
        t           j        fd|          S )Nr   c                 n    | j         n| j         k    sJ | j        n| j        k    sJ | j        S r\   )r^   r_   r]   )tr5   	mm_configs    r   unwrapzDWeightWithDynamicFloat8CastTensor.__torch_dispatch__.<locals>.unwrap   sP     /		*i7777}x5((((9r   c                 &    t          |           S r\   )r   )xr5   re   s    r   <lambda>zFWeightWithDynamicFloat8CastTensor.__torch_dispatch__.<locals>.<lambda>   s    79eLL r   )r,   opsatendetachdefaultr   r]   r^   r_   pytreetree_map_only_ops_to_preserve_subclassrT   )	rY   functypesargskwargsrf   outr5   re   s	          @@r   __torch_dispatch__z4WeightWithDynamicFloat8CastTensor.__torch_dispatch__   s    59>(0004Qa!:DGN   /3	'+	 	 	 	 	 	 +-vfl7K
 
f dD#F##000J#LLLLLL
 
 	
r   c                 b    dg}| j         r|                    d           || j        | j        dfS )Nr]   r=   )re   r5   )r=   appendr^   r_   )r`   tensorss     r   __tensor_flatten__z4WeightWithDynamicFloat8CastTensor.__tensor_flatten__   s?    +" 	1NN/000d&<t{SSSSr   c           
      h    t          | d         |d         |d         t          | dd                     S )Nr]   re   r5   r=   )r   getattr)inner_tensorsflatten_spec
outer_sizeouter_strides       r   __tensor_unflatten__z6WeightWithDynamicFloat8CastTensor.__tensor_unflatten__   s<    0)$%!M#7>>	
 
 	
r   c                 8    d| j          d| j         d| j         dS )Nz)WeightWithDynamicFloat8CastTensor(tensor=z, linear_mm_config=z, dtype=))r]   r^   r_   )r`   s    r   __repr__z*WeightWithDynamicFloat8CastTensor.__repr__   sP     K4<  K  K\`\r  K  K  }A  }H  K  K  K  	Kr   c                     | j         2t          | j        | j         | j        | j        t
          j                  }n.t          | j        | j        | j        dt
          j        |          }|j        f|j	        ffS )NT)reduce_amaxgemm_input_roledevice_mesh)
r=   r   r]   r_   r^   r   WEIGHTr	   _data_scale)r`   meshfloat8_training_tensors      r   fsdp_pre_all_gatherz5WeightWithDynamicFloat8CastTensor.fsdp_pre_all_gather   s    ".%B'&$& &"" &A&  - 4 & & &" ',.1G1N0PPPr   )ru   all_gather_outputs.metadataparam_dtyperu   c                >   |\  }|\  }|nddl m} t          |t                    r||_        nIt          ||          r't          |j        t                    r||j        _        nt          d|           d S t          |||| j        t          j	                  |ffS )Nr   r   z[out must be a Float8TrainingTensor or DTensor(_local_tensor=Float8TrainingTensor), but got )r   )
r*   r   r   r
   r   r   RuntimeErrorr^   r   r   )r`   r   r   r   ru   datascaler   s           r   fsdp_post_all_gatherz6WeightWithDynamicFloat8CastTensor.fsdp_post_all_gather   s     %?999999#344 	"

C)) j!#7/ /  ,1!(("wruww   F#")0
 
 
 7 	r   r\   )__name__
__module____qualname__staticmethodr,   rT   r   r5   r   rZ   ra   classmethodrv   rz   r   r   r   r   r   r   r   r   r   r   r      s        59
 

 )
 {	

 $EL1
 
 
 \
2 594 44 )4 {	4
 $EL14 4 4 4 
 
 
 [
BT T T 
 
 \
K K KQ Q Q4 '+  !%,"34  [	 el#     r   r   ).r1   typingr   r   r   r   r   r,   torch.nnnntorch.utils._pytreeutils_pytreern   torch._prims_commonr   #torchao.float8.float8_scaling_utilsr	   %torchao.float8.float8_training_tensorr
   r   r   r   torchao.float8.float8_utilsr   no_gradModulerG   rj   rk   
empty_likerm   	new_zerosslicerT   copy_view
as_strided_to_copy_pin_memorysplitclonerp   r   serializationadd_safe_globalsr   r   r   <module>r      s    2 2 2 2 2 2 2 2 2 2 2 2 2 2        $ $ $ $ $ $ $ $ $ 5 5 5 5 5 5                 , + + + + + 3VRY 3V4 3V 3V 3V 3Vt 
IN%	IN$	IN	IN 	IN	IN%	IN#	IN&	IN	IN  `K K K K K K K K^   $ $&G%H I I I I Ir   