
    )`i                     (   d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	 d dl
Z
ddlmZmZ da edg d	          Z ed
g dd           ZdefdZdefdZ ej                    Zd Z ej                    Zd Ze j        defd            Zd Ze G d d                      ZdedefdZd6de
j        dededefd Z d6de
j        dededefd!Z!e
j"        #                    d"d#$          	 d6de
j        dededede
j        f
d%            Z$e
j"        %                    d"          d6d&            Z&d'edefd(Z'd'edefd)Z(d'ed*ee         defd+Z)de	e         fd,Z*	 d7de	ed.f         fd/Z+d8d1Z,d2eee                  fd3Z-d0a.defd4Z/defd5Z0dS )9    N)	dataclass)Enum)DictListTuple   )ceil_divround_upFAuxStreamType)	Attention	MoeSharedMoeChunkingOverlap	EventType)Mainr   r   r   )startenablec                 
    | a d S Nis_torch_compiling_flagr   s    n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/fused_moe/utils.pyset_torch_compilingr      s    $    returnc                      t           S r   r    r   r   is_torch_compilingr      s    ""r   c                      t           S r   )_global_attrsr   r   r   get_global_attrsr!   %   s    r   c                  .    t          t          dd           S Nattrs)getattr_model_extra_attrsr   r   r   get_model_extra_attrsr'   ,   s    %w555r   r$   c              #      K   t          t          dd           }| t          _        	 d V  |t          _        d S # |t          _        w xY wr#   )r%   r&   r$   )r$   	old_attrss     r   model_extra_attrsr*   0   sS      *GT::I$-#,   9 ,,,,s	   8 Ac                       fd}|S )Nc                       fd}|S )Nc                     t           |                     5   | g|R i |cd d d            S # 1 swxY w Y   d S r   )r*   )selfargskwargsfunc	get_attrss      r   wrapperz:with_model_extra_attrs.<locals>.decorator.<locals>.wrapper<   s    "99T??33 3 3tD242226223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3s   377r   )r1   r3   r2   s   ` r   	decoratorz)with_model_extra_attrs.<locals>.decorator;   s)    	3 	3 	3 	3 	3 	3 r   r   )r2   r4   s   ` r   with_model_extra_attrsr5   :   s$         r   c                   \    e Zd ZU ej        ed<   ej        ed<   dZeed<   ed             Z	dS )Fp4QuantizedTensor
fp4_tensorscaling_factorTis_sf_swizzledc                     | j         j        S r   )r8   shape)r.   s    r   r<   zFp4QuantizedTensor.shapeK   s    $$r   N)
__name__
__module____qualname__torchTensor__annotations__r:   boolpropertyr<   r   r   r   r7   r7   E   s[         L   ND% % X% % %r   r7   rowcolc                 J    t          | d          }t          |d          }||fS N      )r
   )rE   rF   
padded_row
padded_cols       r   compute_swizzled_sf_shaperM   P   s+    #s##J#q!!Jz!!r      sfrowscolsscaling_vector_sizec                     t          ||          }|                     d||          } t          j        j                            |           S )a  Swizzle FP4 scaling factors using C++ torch op implementation
    Args:
        sf: [b, rows, cols_sf] or [rows, cols_sf]. The original unswizzled scaling factors.
        rows: rows of the original unquantized tensor
        cols_sf: ceil_div(cols, scaling_vector_size) where cols is the number of columns of the original unquantized tensor
        scaling_vector_size: the size of the scaling vector
    Returns:
        [b * round_up(rows, 128) * round_up(cols_sf, 4), ] 1D swizzled scaling factors, possibly with rows and cols padded.
    )r	   viewr@   opstrtllmblock_scale_interleaverO   rP   rQ   rR   sf_colss        r   
swizzle_sfr[   V   sB     t011G	T7	#	#B9222666r   c                     t          ||          }|                     d||          } t          j        j                            |                               d|          S )a^  Swizzle FP4 scaling factors using C++ torch op implementation
    Args:
        sf: The (padded and) swizzled scaling factors.
        rows: rows of the original unquantized tensor
        cols: cols of the original unquantized tensor
        scaling_vector_size: the size of the scaling vector
    Returns:
        2D unswizzled scaling factors
    rT   )r	   rU   r@   rV   rW   block_scale_interleave_reverserY   s        r   unswizzle_sfr^   e   sR     t011G	T7	#	#B9::2>>CCBPPPr   ztrtllm::reswizzle_sfr   )mutates_argsc                    t          ||          }t          ||          \  }}||z  }|                                 ||z  z  dk    sJ |                                 ||z  z  }|                     |||          }	t	          |	|||          }
||z  }|
                    |||          }
|
ddd|d|f                                         }|                    ||          }t          ||||          S )a  Reswizzle FP4 scaling factors using C++ torch op implementation.
       It unswizzles the scaling factors in each partition first, then concatenates them together, and finally swizzles them back.
    Args:
        sf: The (padded and) swizzled scaling factors.
        rows: rows of the original unquantized tensor
        cols: cols of the original unquantized tensor
        scaling_vector_size: the size of the scaling vector
    Returns:
        1D reswizzled scaling factors
    r   N)r	   rM   numelrU   r^   
contiguousr[   )rO   rP   rQ   rR   rZ   padded_rowspadded_sf_colspadded_colsnum_partitionssf_reshapedsf_unswizzled
total_rowssf_concatenateds                r   reswizzle_sfrk   t   s    t011G";D'"J"JK #66K88::~56!;;;;XXZZK.$@AN''.+~FFK ![+/B M
  $&J!&&~{NSSM#	5D5(7(jll  &**:w??O oz49LMMMr   c                     t          ||          }t          ||          \  }}|                                 ||z  z  }||z  }t          |d          t          |d          z  }	|                     |	          S rH   )r	   rM   ra   r
   	new_empty)
rO   rP   rQ   rR   rZ   rc   rd   rf   ri   szs
             r   _ro      sw    t011G";D'"J"JKXXZZK.$@AN$&J	*c	"	"XdA%6%6	6B<<r   xc                     | dk     rdS | dz
  }||dz	  z  }||dz	  z  }||dz	  z  }||dz	  z  }||dz	  z  }||dz	  z  }|dz   S )N   r   rJ      rN       r   )rp   ns     r   next_positive_power_of_2rv      sr    1uuq
 	
AAaKAaKAaKAaKAbLAbLAq5Lr   c                 :    t          |           }|| k    r|S |dz  S Nr   )rv   )rp   nexts     r   last_positive_power_of_2rz      s'    #A&&Dqyy19r   bucketsc                 p    t          t          t          |           |d                   |d                   S )Nr   rT   )minmaxrv   )rp   r{   s     r   nearest_in_bucketsr      s-    s+A..
;;WR[IIIr   c                     t          |           } g }| }|dk    r |                    |           |dz  }|dk     t          |          S )Nrr   r   )rv   appendtuple)max_num_tokensnum_token_bucketsms      r   !get_power_of_2_num_tokens_bucketsr      s\    -n==NA
q&&  ###	a q&& "###r   rr   .c                     t          |           } g }| }||k    r |                    |           |dz  }||k     t          |          S rx   )rz   r   r   )r   min_num_tokensr   r   s       r   &get_last_power_of_2_num_tokens_bucketsr      sb     .n==NA
~

  ###	a ~

 "###r   Tc                    d}t          t          |           dz
            D ]}|| |         z  }d | D             }|dxx         dz  cc<   |r*t          |d          t          | d         |z  d          z  n|| d         |z  z  }||fS )Nrr   c                     g | ]}|S r   r   ).0is     r   
<listcomp>z!get_fp4_shape.<locals>.<listcomp>   s    +++!A+++r   rT   r   rI   rJ   )rangelenr
   )input_shapesf_vec_sizeis_swizzled_layoutr   r   output_shapescale_shapes          r   get_fp4_shaper      s    	A3{##a'((  	[^++{+++L 	2C8KO{$BAFFFF+b/[01 
 $$r   input_shapesc                 @    t          | d         d          \  }}|dz  S )z1Calculate the dimensions of the fp4 scale tensor.r   rN   )r   r   )r   )r   	out_shaper   s      r   fp4_scale_infer_shaper      s'    *<?KKKI{?r   c                 
    | a d S r   _enable_piecewise_cuda_graphr   s    r   set_piecewise_cuda_graph_flagr      s    #)   r   c                      t           S r   r   r   r   r   get_piecewise_cuda_graph_flagr      s    ''r   )rN   )rr   )T)1
contextlib	threadingdataclassesr   enumr   typingr   r   r   r@   utilsr	   r
   r   r   r   rC   r   r   localr    r!   r&   r'   contextmanagerr*   r5   r7   intrM   rA   r[   r^   library	custom_oprk   register_fakero   rv   rz   r   r   r   r   r   r   r   r   r   r   r   <module>r      s5           ! ! ! ! ! !       $ $ $ $ $ $ $ $ $ $  & & & & & & & & 444  D<<<
  	% % % % %
#D # # # #
  	!!   %Y_&& 6 6 6 -T - - - -   % % % % % % % %"3 "S " " " "7 75< 7s 7# 7C 7 7 7 7Q QU\ Q QC Qc Q Q Q Q /bAAGI$N $N$N$N'*$NAD$N
\$N $N $N BA$NN 344   54     "     J# JS	 Jc J J J J$s $ $ $ $ $%	$ 	$
38_	$ 	$ 	$ 	$% % % % T#Y      $ *$ * * * *
(t ( ( ( ( ( (r   