
     `iH                     `   d dl mZmZmZ  e            r
ddlZddlmZ  e            rddlmZ ddlZddl	m
Z
  ej        e          Zg dZe
d             Zd	 Zd
 Zej        dddej        dedej        fdZ G d dej                  Zd Zd Zd Zd Zd Z	 	 	 	 	 ddZ	 	 	 	 ddZdS )   )is_accelerate_availableis_torch_availablelogging    N)nn)init_empty_weights)contextmanager)g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      c              #     K   t                      rdd l}t          | |j                  r| j        } n%t          | t
                    r |j        |           } t          | dd           }|dk    r8|j                            |           5  d V  	 d d d            d S # 1 swxY w Y   |dk    rHt          |d          r8|j	                            |           5  d V  	 d d d            d S # 1 swxY w Y   d V  d S )Nr   typecudaxpu)
r   torch
isinstanceTensordevicestrgetattrr   hasattrr   )devr   dev_types      s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/integrations/mxfp4.py	on_devicer   3   s      c5<(( 	$*CCS!! 	$%,s##C3--v""3''                  u!6!6!!#&&                  
EEEEEs$   BB"BC))C-0C-c                     |j         j        j        } ||                     t          j                  t          j        d          \  } }| |fS )N   )axis)numerics_detailsmxfpdowncast_to_mxfp_torchtor   bfloat16uint8)wtriton_kernels_hubr   w_scales       r   quantize_to_mxfp4r%   J   sG    /@E\''U^(<(<ekPQRRRJAwg:    c                    |j         j        |j         j        |j         j        }}}|j        j        }|j        j        j        }|                    d          \  }}	 | || |          |fi |	}  | ||          |          }| |fS )zE
    Changes the layout of the tensors depending on the hardware
    r   )mx_axisdtype)tensorFP4convert_layoutwrap_torch_tensortensor_detailslayoutStridedLayout"make_default_matmul_mxfp4_w_layout)
r"   r$   r#   r,   r-   r.   r0   r1   value_layoutvalue_layout_optss
             r   swizzle_mxfp4r5   P   s    
 	!%!0!3 +C
  .5F&5<JM&,&O&OXY&O&Z&Z#L#((#666ZZHYZZAn..w77GGGg:r&   i   )r*   rows_per_chunkr*   r6   returnc                   ddl }| j        sFt          j                                        r(|                                 } |                                }|                    t          j                  dz
  }| j        dd         |j        k    s"J d| j        dd         d|j                    t          j        t          || j
                  }| j        ^ }}}|                    |          |z  }	|                     |	|          } |                    |	d          }t          j        |	|d	z  || j
                  }
t          d|	|          D ]}t          ||z   |	          }| ||         }|||         }|d
z                      t          j                  }|dz	                      t          j                  }|
||         }||         |ddddd	f<   ||         |ddddd	f<   t          j        |||           ~~~~~  |
j        g |||d	z  R  j        g |||z  d	z  R  }
~ ~~|
                    dd	                                          S )zw
    Convert the mxfp4 weights again, dequantizing and makes them compatible with the forward
    pass of GPT_OSS.
    r   N   zblocks.shape[:-1]=z does not match scales.shape=)r*   r   r   r         )out)mathis_cudar   r   is_availabler   int32shaper+   
FP4_VALUESr   prodreshapeemptyrangeminlongldexpview	transpose
contiguous)blocksscalesr*   r6   r>   lutprefix_shapeGB
rows_totalr=   r0r1blkexpidx_loidx_hisubs                     r   convert_moe_packed_tensorsr\   d   sr    KKK > ej5577 YYu{##c)F<,,,.dcrc1B.d.dU[Ua.d.d,,,
,zv}
E
E
EC ,\1a<((1,J^^J**F^^J**F
+j!a%uV]
K
K
KCAz>22 * *n$j11RUmRUm *,,(uz**"R%j6{AAAqt!tG6{AAAqt!tGC#&&&&FCcc
3+#+
.|
.Q
.A
.
.
.
3
M\
M1q519
M
M
MC==A))+++r&   c                   B     e Zd Z fdZdej        dej        fdZ xZS )Mxfp4GptOssExpertsc           	         t                                                       |j        | _        |j        | _        |j        | _        t          j        t          j	        | j        d| j        z  | j        dz  dt          j
                  d          | _        t          j        t          j	        | j        d| j        z  | j        dz  t          j
                  d          | _        t          j        t          j	        | j        d| j        z  t          j                  d          | _        t          j        t          j	        | j        | j        | j        dz  dft          j
                  d          | _        t          j        t          j	        | j        | j        | j        dz  t          j
                  d          | _        t          j        t          j	        | j        | j        t          j                  d          | _        d| _        t'          |dd	          | _        d | _        d | _        t'          |dd	          | _        d S )
Nr          r)   Frequires_gradgZd;?swiglu_limitg      @)super__init__num_local_expertsnum_expertsintermediate_sizehidden_sizer   	Parameterr   zerosr!   gate_up_proj_blocksgate_up_proj_scalesfloat32gate_up_proj_biasdown_proj_blocksdown_proj_scalesdown_proj_biasalphar   limitgate_up_proj_precision_configdown_proj_precision_config)selfconfig	__class__s     r   rf   zMxfp4GptOssExperts.__init__   s#   !3!'!9!-#%<K(!d.D*DdFVZ\F\^`hmhsttt$
 $
 $
  $&<K(!d.D*DdFVZ\F\didoppp$
 $
 $
  "$K(!d.D*DEMZZZjo"
 "
 "
 !#K)4+;T=SWY=Y[]^fkfqrrr!
 !
 !
 !#K($*:D<RVX<X`e`klll!
 !
 !
 !lK($*:%-PPP`e
 
 
 
V^S99
-1**.'V^S99


r&   hidden_statesr7   c                 2   t           j        j        t           j        j        t           j        j        }}}t           j        j        }t          |j                  5   | |d|d          | j        | j	        fd          }	 ||| j
        | j                            t          j                  ||| j        d |	          }
 ||
| j        | j                            t          j                  ||| j        |j                  }d d d            n# 1 swxY w Y   |S )Nswiglu)rt   ru   r   )gather_indxprecision_configgammasfused_activation)scatter_indxr   r   )r#   
matmul_ogsFnSpecsFusedActivationr}   	swiglu_fnr   r   rt   ru   gate_up_projrp   r   r   ro   rv   	down_projrs   rw   	gate_scal)rx   r{   routing_data
gather_idxscatter_idxr   r   r   r   actintermediate_cache1intermediate_cache3s               r   forwardzMxfp4GptOssExperts.forward   sb   )1)9)4 #-
 '-7	}+,, 	 	!/''(I?Q"R"RUYU_aeakTlnoppC",*!&))%-88&!%!C!$	# 	# 	# #-*##&&u}55(!%!@#-# # #	 	 	 	 	 	 	 	 	 	 	 	 	 	 	. #"s   B'DDD)__name__
__module____qualname__rf   r   r   r   __classcell__)rz   s   @r   r^   r^      sa        ": ": ": ": ":H#U\ #]b]i # # # # # # # #r&   r^   c                 T   dd l }t          j        j        t          j        j        t          j        j        t          j        j        f\  }}}}t          | j                  5  t          j
                                        }t          |j                            dd                    }d}	| j        d         }
| j        d         }||z  }||z  }|dz   |z  }|
|z  }d } || |          \  }}t          j        |d          }t          j        |d          \  }}t          j        |d|          }|                    d          }t          j        |||dz
            ||         }|                    d                              t          j                  }d	}t          j        ||k     ||          }t          j        |d
                              t          j                  }t          j        |                              t          j                  }t          j        ||k     ||	          }t          j        ||k    ||	          }t          j        ||	k    |	|          }||         }t          j        ||         |	k    |	|          } ||                                |                                          } ||                                |                                          } ||||          }|}d d d            n# 1 swxY w Y    ||||||          ||fS )Nr   
LOCAL_RANK0r:   r   c                     t          j        |  dd          d d d |f         }|                                }t          j        | |d          }||                                fS )Nr   T)dimstabler   )r   argsortrI   take_along_dimint)valsktk_indxtk_vals       r   topkz routing_torch_dist.<locals>.topk   sb    mTEq>>>qqq"1"uEGllnnG)$Q???F7;;==((r&   r   )binsmaxi  T)r   )src_indxdst_indx)osr#   routing
GatherIndxRoutingDataScatterIndxcompute_expt_data_torchr   r   r   distributedget_world_sizer   environgetrB   softmaxsortgatherrE   histcrK   r   rA   wherer   )logitsn_expts_actr   r   r   r   r   
world_sizerankreplace_valuen_tokensn_expts_totn_local_expertslocal_expert_startlocal_expert_endn_gates_padr   	expt_scal	expt_indxsort_indiceshistvar	topk_indx	gate_indxr   r~   r   	expt_datahit_expertss                                r   routing_torch_distr      sF    III 	"-".".":	EAJ[*A 
6=	!	! 3" 3"&5577
2:>>,4455<?l1o%3!O3 1H7,	) 	) 	)  $tFK88	9M)444	"'*YA">">">	<LA|<<	 %%b))	{9;K!OLLLM_`pMpqNN2&&))%+66	 K	,> >YOO	M)D999<<U[II	M),,//<<	K	,< <iWW	K 2i ?MZZ	K	] :M9UU	i(	K	) 4 E}V_``	 !j)--//IMMOOTTT"{IMMOOimmooVVV++D/;OO	!g3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3"h ;y$iPPR]_kkks    J!LLLc                 z   dd l m} |                                r,|                                rt	          | d          rt
          }nt          j        j        }|j        d         }|	                    d| j
        j                  }t          j                            || j
        j        | j
        j                  }t#          |j                  5   ||| j
        j                  \  }}}d d d            n# 1 swxY w Y   |                     ||||          }	|		                    |d| j
        j                  }	|	|fS )Nr   
_is_hookedr:   )torch.distributedr   r@   is_initializedr   r   r#   r   rB   rE   router
hidden_dimr   
functionallinearweightbiasr   r   top_kexperts)
rx   r{   distr   
batch_sizerouter_logitsr   r   r   
routed_outs
             r   mlp_forwardr   '  s|   $$$$$$ 5t2244 5|9T9T 5$$,4$Q'J!))"dk.DEEMM((8JDKL\]]M	='	(	( Z Z07t{GX0Y0Y-j+Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z m\:{SSJ##JDK4JKKJ}$$s   C55C9<C9c                 l    d                     |           t          fd|D                       sdS dS )N.c              3   t   K   | ]2}t          j        | d           pt          j        |           V  3dS )z\.N)rematch).0keycurrent_key_name_strs     r   	<genexpr>z(should_convert_module.<locals>.<genexpr>=  s[        dgC233_rx3J^7_7_     r&   TF)joinany)current_key_namepatternsr   s     @r   should_convert_moduler   ;  sZ    88$455    ks      t5r&   c                    ddl m} |                    d          }|                    d          }|                    d          }	|                    d          }
|                    d          }|                    d          }d	D ]@}||v r8| ||||||	|
||          }| d
}| d}t          | |                    dd          d         |           t          | |          rt          | |          rt          t          | |          t          | |                    }|dk    r<t          j	        
                                rt          j	                                         t          | |t          j                            |                    |                               t          | |           t          | |           Bd S )Nr   shard_and_distribute_modulemodelempty_paramcasting_dtypeto_contiguousr   device_mesh)r   r   _blocks_scalesr   r   cpu)integrations.tensor_parallelr   r   setattrrsplitr   r\   r   r   r   r@   empty_cacher   rk   r   delattr)module
param_nameparam_valuetarget_devicedq_param_namekwargsr   r   r   r   r   r   r   projblocks_attrscales_attrdequantizeds                    r   
dequantizer  D  s   JJJJJJJJwE**]++KJJ//MJJ//M::fD**]++K- - -:&99!!!	 	 "***K!***KFJ--c155a8+FFFv{++ -0L0L -89U9UW^_egrWsWstt E))ej.E.E.G.G)J**,,,eh&8&89V9V&W&WXXX,,,,,,-- -r&   c                 J   |j         j        |j         j        |j         j        }}}ddlm}	 |                    d          }
|                    d          }|                    d          }|                    d          }|                    d          }|                    d          }d	|v r4|                    d
          d                             d          d         }d|v r4|                    d
          d                             d          d         }| |	|
|||||||           nJt          | |	                    d
d          d         t          j                            |d                     | d}| d}t          | |          }t          | |          }|j        j        dk    r|j        j        dk    r|                    d          }|dk    r |                    || j        dz  d          }n|                    |d| j        dz            }t          |d|          dk    rd}|                    |                                          }|                    |                                          }t+          |          5  t-          |                    dd          |                    dd          |          \  }}ddd           n# 1 swxY w Y   |dk    r*t          j        || j        | j        dz  g          |_        n&t          j        || j        | j        g          |_        t          | ||           t          | | d || | |                                           t7          | |           t7          | |           ~dS dS dS )zq
    This transforms the weights obtained using `convert_gpt_oss.py` to load them into `Mxfp4GptOssExperts`.
    r   r   r   r   r   r   r   r   rN   r   r:   r   r   rO   r   Nr   Frb   metar   r   r   r   _precision_config)rhs_data)weight_scaleflex_ctx)r   PrecisionConfigFlexCtx
InFlexDatar   r   r   splitr   r   r   r   rk   r   r   r   sizerE   ri   r   rM   r   r5   rL   Sizerj   rB   r   )r   r   r   r   r#   r   r  r  r  r   r   r   r   r   r   r   r  r  r  rN   rO   local_expertstriton_weight_tensorr  s                           r   load_and_swizzle_mxfp4r  g  s   
 	%5%-%0 )WO
 KJJJJJJJwE**]++KJJ//MJJ//M::fD**]++K:$$R(..y99!<:$$R(..y99!<##;ZW[]h	
 	
 	
 	
 	
))#q11!4eh6H6Hdi6H6j6jkkk"""K"""KV[))FV[))F}V##(:f(D(DA>!!^^M63Ka3OQSTTFF^^M2v7OST7TUUF=&-88EAA"M=))4466=))4466}%% 	 	1>  R((&*:*:2r*B*BDV2 2. ,	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 >!!).]FDVX^XpstXt4u)v)v &&).]FD\^d^p4q)r)r & 	2333&&&OQ[Q[Q]Q]@^@^@^___	
 	
 	
 	$$$$$$FFA $#(D(Ds   =KK"KFc           	         |g }|                                  D ])\  }}|                    |           t          ||          s|                    d           A|j        j        dk    rF|j        s?t                      5  t          |          | j	        |<   d}d d d            n# 1 swxY w Y   |j        j        dk    r#|j        sddl
m}  |t          |          |_        t          t          |                                                    dk    rt#          ||||||          \  }	}|                    d           +| |fS )Nr:   GptOssExpertsT	GptOssMLPr   )
MethodType)has_been_replacedry   )named_childrenappendr   poprz   r   r  r   r^   _modulestypesr  r   r   lenlistchildren_replace_with_mxfp4_linear)
r   modules_to_not_convertr   quantization_configr  ry   namer   r  _s
             r   r#  r#    s    ,,.. ! !f%%%$%57MNN 	  $$$$77@S@^7#%% ) )'9&'A'At$$(!) ) ) ) ) ) ) ) ) ) ) ) ) ) ) $33<O<Z3(((((('ZV<<FNtFOO%%&&''!++#=& #"3$ $ $ A  	R    ###s   ?B%%B)	,B)	c                 (   |j         r| S ddlm}  |d          a|dgn|}|j        |                    |j                   t          t          |                    }t          | ||||          \  } }|st          
                    d           | S )Nr   )
get_kernelz kernels-community/triton_kernelslm_head)ry   zYou are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r  kernelsr)  r#   r$  extendr!  setr#  loggerwarning)r   r$  r   r%  ry   r)  r  s          r   replace_with_mxfp4_linearr0    s     % L&&&&&& (Z(JKK,B,Ji[[Pf1=%%&9&PQQQ!#&<"="=>>9     E  
	
 	
 	
 Lr&   )NNNFN)NNNN) utilsr   r   r   r   r   
accelerater   r   
contextlibr	   
get_loggerr   r.  rC   r   r%   r5   r    r*   r   r   r\   Moduler^   r   r   r   r  r  r#  r0   r&   r   <module>r7     s   I H H H H H H H H H  LLL .------ 				 % % % % % % 
	H	%	%  
( 
 
 
,    0 &3, 3, 3, ;	3,
 3, \3, 3, 3, 3,lD# D# D# D# D# D# D# D#RAl Al AlH% % %(   -  -  -F@ @ @J  "$ "$ "$ "$N  " " " " " "r&   