
    .`i=                        d dl Z d dlZd dlmZmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddl	mZ dd	lmZ e G d
 d                      Ze G d d                      Ze G d d                      Ze G d d                      Z G d de j        j                  Zd Z G d de j        j                  Zd Zd ZdZd Zd Zd Zd Zd#dZd  Zd$d!Z d%d"Z!dS )&    N)	dataclassfield   )_combined_routing_compute)_combined_routing_memset)_routing_clear_bitmatrix)_expt_data_memset)_expt_data_compute)is_hipc                   <    e Zd ZU dZej        ed<   ej        ed<   dS )
GatherIndxzG
    Indices for an operation that performs:
    Y = X[src_idx, :]
    src_indxdst_indxN__name__
__module____qualname____doc__torchTensor__annotations__     {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/third_party/triton_kernels/routing.pyr   r      :          
 llr   r   c                   <    e Zd ZU dZej        ed<   ej        ed<   dS )ScatterIndxzG
    Indices for an operation that performs:
    Y[dst_idx, :] = X
    r   r   Nr   r   r   r   r   r      r   r   r   c                       e Zd ZU ej        ed<   ej        ed<   eeej        f         ed<   eeej        f         ed<   d ZdS )ExptDatahisttoken_offs_rawtoken_offs_padblock_pid_mapc                 ~   | j         | j         j        t          j        k    sJ | j        | j        j        t          j        k    sJ | j        3| j                                        D ]}|j        t          j        k    sJ | j        3| j                                        D ]}|j        t          j        k    sJ d S d S N)r    dtyper   int32r!   r"   valuesr#   )selfvs     r   __post_init__zExptData.__post_init__7   s    9 9?ek1111*&,;;;;*(//11 . .w%+-----)'..00 . .w%+----- *). .r   N)	r   r   r   r   r   r   dictintr+   r   r   r   r   r   "   sy          , L    el*++++ U\)****
. 
. 
. 
. 
.r   r   c                       e Zd ZU  e            Zej        ed<    e            Zej        ed<    e            Z	e
ed<    e            Ze
ed<   dZeed<    ed          Ze
ed<   d	 ZdS )
RoutingData	gate_scal	expt_histn_expts_totn_expts_actN	expt_data)defaultexpected_tokens_per_exptc                     || j         k    r|S t          j        t          || j         z
  dz   d          |          | j         z   dz
  S )Nr   r   )r2   tritoncdivmax)r)   n_rowsblock_ms      r   n_blockszRoutingData.n_blocksQ   sM    T%%%M;s6D,<#<q#@!DDgNNQUQaadeeer   )r   r   r   r   r0   r   r   r   r1   r2   r-   r3   r4   r   r6   r=   r   r   r   r/   r/   D   s         #eggIu|%%%#eggIu|%%%uwwKuwwKIx
 %*E$$7$7$7c777f f f f fr   r/   c                   :    e Zd Zed             Zed             ZdS )
SortTokensc                    d}d}d}t           j        }|j        }	|j        }
|j        \  }}|j        \  }}||z  }|                    |          \  }}|d |         }|j        t          j        k    sJ t          j        |t          j        |	          }t          j        |dz  t          j        |	          }|d |         }||d          }t          j        ||
|	          }t          |||          \
  }}}}}}}}}}  ||dz  |          |z   dz   }! |||          }"t          ||!z   f         ||dz  d||||j        d	         |||j        d	         |                    d	          |                    d          ||                    d	          |||| |d|
           |}#t          ||"z   f         ||||||#|#                    d	          |#                    d          |||||||                    d	          ||                    d	          || ||           || _        || _        || _        |                     |           |||||||fS )N       i   )partials_block_sizer&   device   r   r   )SIZESBLOCK_ABLOCK_NBLOCK_M)r8   r9   rE   r&   shapesumr   r'   empty_compute_expt_data_internalr   strider   n_tokens_rawn_tokens_padr3   save_for_backward)$ctx	expt_scal	expt_indxr2   	bitmatrixHIST_BLOCK_MINDX_OFFS_BLOCK_MMEMSET_BLOCKr9   rE   r&   rQ   _rR   r3   n_gates_padr    partial_hist	expt_offscombined_indx	topk_indx	gate_indxr0   token_offs_combinedr!   r"   r#   blocks1ablocks2aMEMSET_BLOCK_AHIST2_BLOCK_Mblock_m_log2_startblock_m_numblocks1bblocks2b	indx_offss$                                       r   forwardzSortTokens.forward_   s   {!#/a$-O!k"[0&]]|]LLlL[L!zU[((((K5;vNNN	K!O5;vVVV!,;,/	!+,,/	K5HHH	 b}+{b, b,  	_^^]HV^`np}  @R  T_ 4a66DqH4l33 (X"5!89;?Bdtz!}k<q!<#6#6q#9#9<;N;Nq;Q;Q!4!;!;A!>!>mk>!2	
 	
 	
 	
 !	!8h#6"9:y)y)Y-=-=a-@-@)BRBRSTBUBU|+.."7"7":":M=K_K_`aKbKb]H	
 	
 	
 ('%i(((Y	9nnVcccr   c                 p    | j         \  }||         }|                    | j        | j                  }|d d d fS r%   )saved_tensorsreshaperR   r3   )	rT   _0_1_2
dgate_scal_3_4_5ra   s	            r   backwardzSortTokens.backward   s?    )	*
''(8#/JJ
4t++r   N)r   r   r   staticmethodrl   rw   r   r   r   r?   r?   ]   sL        6d 6d \6dp , , \, , ,r   r?   c                 <    t                               | |||          S r%   )r?   apply)rU   rV   r2   rW   s       r   sort_tokensr{      s    Iy+yIIIr   c                   $    e Zd Zed             ZdS )PruneRoutingc                 v   ddl m } |j        d         }||z  dk    sJ t          |f         |j        j        |j        j                            d          |j        j                            d          |j        j        j        d         ||z  d            ||||          \  }}||z  }||j        d<   |||fS )Nr   )
compactionr   rB   )rJ   rG   )r   rL   r   storagedatarP   )rT   rU   rV   rW   r2   simulated_epr   rR   s           r   rl   zPruneRouting.forward   s    ****** q)\)Q.... ,!12""))!,,"))!,,"(+<'	
 	
 	
 	
  *z)Y	JJ	9!\1)	)Y..r   N)r   r   r   rx   rl   r   r   r   r}   r}      s-        / / \/ / /r   r}   c                 >    t                               | ||||          S r%   )r}   rz   )rU   rV   rW   r2   r   s        r   prune_routingr      s    iI{LYYYr   c                 h    | dk    r| | dz
  z  dk    s
J d            |                                  dz
  S )Nr   r   zx must be a power of two)
bit_length)xs    r   log2_power_of_twor      s?    q55a1q5ka''')C'''<<>>Ar      c           
      N   dd}| j         }|}t          j        t                      rdnd}|t          z
  }||k    r|}n|dz
  ||z
  dz
  dt          z  z  z
  }fd}t
          j        }	t          j        |dz    ||dz             f|	|          }
|
d         d |dz            }|
dd          }t          j        | ||          f|	|          }t          j        |          z  }|d d d |dz   f         }|d d d |f         }||z   dz   }||z  }|
||||||t          |f
S )	NrB   	      r   rF   c                 "     |           z  S r%   r   )r   rZ   r9   s    r   <lambda>z-_compute_expt_data_internal.<locals>.<lambda>   s    DDL))L8 r   rD   r   )	rE   r8   r9   r   rg   r   r'   rN   numel)r1   r2   n_gatesrf   rE   block_m_log2_endrh   max_n_tilespadr&   rb   r!   r"   r#   memset_gridblocks1blocks2rZ   r9   s                    @@r   rO   rO      s   LMFK;D"HH+qq!"%77K+!Ao+*?!*CK]H])]^
8
8
8
8
8CKE+{QK!O8L8L&MUZcijjj(+,<[1_,<=N(,NKcc+.>.> ?uU[\\\M+m,,<K#AAA'7a'7$78N!!!!\k\/2MK'!+GK'GwX_amo|  Q  S^  ^  ^r   c                      t            j        d         z   } fdt          t          t           |                    D               S )Nr   c                 6    i | ]\  }}d |z  |ddf         S )rF   Nr   ).0ijr   s      r   
<dictcomp>z%_unpack_into_dict.<locals>.<dictcomp>   s/    ZZZ41aAqAAAwZZZr   )rg   rL   	enumeraterange)r   r   s   ` r   _unpack_into_dictr      sF    )AGAJ6ZZZZ9U3EGW-X-X#Y#YZZZAHr   c                    | t          d d d d           S t          | ||          \
  }}}}}}}	}
}}t          |f         | |||                    d          ||||	d	  	         t	          |f         | ||                    d          ||                    d          |||
d	  	         t          |          }t          |          }t          | |||          S )Nr   r   )rH   BLOCK	num_warps)r   rO   r	   rP   r
   r   )r1   r2   r   rb   r!   r"   r#   r   r   rZ   rf   rg   rh   s                r   compute_expt_datar      s&   dD$/// Zu;Z) Z)  WQXZfhu  xJ  LW wk";077::+\    {#>>#8#8#;#;]ML`L`abLcLc+]   
 '~66N%m44MI~~}MMMr   c                    t          ||||           \  }}}}}	}
}t          |
          }
t          |          }t          ||	|
|          }t          ||          }t	          ||          }t          |||||          ||fS )Nr   r   )r{   r   r   r   r   r/   )rW   rU   rV   r2   r3   r    r`   ra   r0   r!   r"   r#   r4   gather_indxscatter_indxs                  r   routing_from_bitmatrixr     s    [f9k9\6 \6XD)Y	>>=&~66N%m44M~~}MMI i)DDDK	IFFFLy$[)LLk[gggr   Fc                     ddl m } |rt          j        | d          }  || || ||          \  }}}| j        d         |z  }	|dk    r"t	          |||| j        d         |          \  }}}t          ||||	|          S )Nr   )topkrG   dim)apply_softmaxy_indxr;   )r   r   softmaxrL   r   r   )
logitsr3   sm_firstrV   r   r;   r   rU   rW   r2   s
             r   routingr   $  s     /v2...&*d6;=Ei`f'h 'h 'h#Iy),r"l2Ka*7	9iY_YefhYikw*x*x'	9i!)Y	;P[\\\r   c                 d   | j         }t          j        | d          }t          j        t          j        d|          |f          }|                                }g d}t                      r|                    d           ||k    r|}n|dz
  ||z
  dz
  t          |          z  z
  }t                      }t                      }|D ]V}	| |	z   dz
  |	z  }
t          j        |
d          ||	<   t          j        t          j        d|          ||	         f          ||	<   ||	                                         ||	<   t          j
        |t          j        |           ||	<   t          j        ||          }t          j        ||          d d d f         |dz  d d d f         z   }||	         d d	d f         |d d d f         z   }|d d d f         |
d d d f         k     }||	                             ||         f|                                |                    Xt          | |||          S )
Nr   r   r   )rE   )   rA   @         rD   r   rG   )rE   r   cumsumcatzerosr-   r   appendminr,   onesr'   arange
index_put_r   )r    r2   r   rE   r!   block_msr   r"   r#   r<   n_tilescolmap_valsmap_idxsmasks                  r   compute_expt_data_torchr   7  sM   [F\$A...NYAf = = =~NOON#''))N   Hxx + "Ao+*?!*CH)UVVVNFFM T T'>A%'1"',wA">">">w"')U[6-J-J-JN[bLc,d"e"ew"0"9"="="?"?w"'*[TZ"["["[![g l;v666<F;;;AAAtGDr	SWYZYZYZSZG[[!'*3B395D!!!GD47|gaaag..g))8D>*<hllnnT>RSSSSD..-HHHr   c                     |r|}n$t          j        |  dd          d d d |f         }|                                }t          j        | |d          }|                                }||fS )Nr   T)r   stabler   )r   argsortlongtake_along_dimr-   )valskrV   has_user_provided_indxtk_indxtk_vals         r   
topk_torchr   _  sv     B-1T:::111bqb5AllnnG!$Q777FkkmmG7?r   c                    |d u}| j         d         |z  }|| d |d d f         } | j         \  }}|rt          j        | d          } t          | |||          \  }	}|st          j        |	d          }	|s/t          j        |d          \  }}
t          j        |	d|
          }	|	                    d          }	|                    d                              t          j                  }t          j	        |d          }t          j	        |d          }|	|         }t          j
        |||dz
                                            }t          |                                |                                	          }t          |                                |                                	          }t          |||          }t          |||||          ||fS )
Nr   rG   r   )r   r   T)r   )binsr:   r   )rL   r   r   r   sortgatherro   tor'   r   histcr-   r   r   r   r/   )r   r3   r   rV   r;   r   r\   r[   r2   rU   sort_indicesr`   ra   r0   r    r   r   r4   s                     r   routing_torchr   k  s   &d2,q/K/K
#\NA{ /v2...%fk9]stttIy 5M)444	! ="'*YA">">">	<LA|<<	!!"%%I!!"%%((55Ii555Ii555I)$I;y{aHHHLLNNDimmoo	PPPK	)--//RRRL'k;GGIy$[)LLk[gggr   )FNr   N)F)FNN)"r   r8   dataclassesr   r    routing_details._routing_computer   r   r   routing_details._expt_datar	   r
   target_infor   r   r   r   r/   autogradFunctionr?   r{   r}   r   r   rg   rO   r   r   r   r   r   r   r   r   r   r   <module>r      s     ( ( ( ( ( ( ( ( G G G G G G F F F F F F F F F F F F 9 9 9 9 9 9 : : : : : :                       . . . . . . . .B f f f f f f f f0@, @, @, @, @,( @, @, @,FJ J J/ / / / /5>* / / /,Z Z Z  
  ^ ^ ^D  N N N<
h 
h 
h] ] ] ]&%I %I %IP	 	 	 	h h h h h hr   