
    .`iS"                        d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ dd	lmZ dd
lmZ  ee          Zdej        dej        fdZdej        dededeej        ej        f         fdZdej        dej        dej        dej        dej        dej        dej        fdZ	 d-dej        j        dej        dej        dej        dz  fdZd Z	 d-dej        dej        dej        dz  dej        fdZ	 d-dej        dej        dej        dz  dej        fd Z 	 d-dej        j        dej        dej        dej        dz  dej        f
d!Z! ed"ee #           d$ed%ed&ej"        de#fd'Z$dej        j        d(e#ddfd)Z%	 d-dej        j        dej        dej        dej        dz  fd*Z&ded+ej        f         fd,Z'dS ).z!Utility methods for model layers.    )CallableN)_custom_ops)envs)rocm_aiter_ops)init_logger)CpuArchEnumcurrent_platform)get_cu_count)direct_register_custom_opwreturnc                     | j         }|d         }| dd |dz  f         }| d|dz  d f         }t          j        ||fd          }|                    |          }|S )N.   dim)shapetorchstackreshape)r   r   Nfirstsecondstacked
w_shuffleds          t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/layers/utils.pyshuffle_weightr      so     GEb	Ac8Q!V8mEsAFHH}Fk5&/r222G''J    tokens
vocab_sizenum_seqsc                     t          j        ||dz   ft           j        | j                  }|                    d| t          j        |                      |d d d |f         }|dk    }||fS )N   )dtypedevicer   )r   zeroslongr%   scatter_add_	ones_like)r   r    r!   
bin_countsmasks        r   get_token_bin_counts_and_maskr,   )   s|     	:>"%*V]  J Avuv'>'>???AAA{
{N+J>Dtr   logitsprompt_tokens_tensoroutput_tokens_tensorpresence_penaltiesfrequency_penaltiesrepetition_penaltiesc                    | j         \  }}t          |||          \  }}	t          |||          \  }
}ddlm}  || |	||           | |                    d          |
z  z  } | |                    d          |z  z  } | S )a  
    Applies penalties in place to the logits tensor
    logits : The input logits tensor of shape [num_seqs, vocab_size]
    prompt_tokens_tensor: A tensor containing the prompt tokens. The prompts
        are padded to the maximum prompt length within the batch using
        `vocab_size` as the padding value. The value `vocab_size` is used
        for padding because it does not correspond to any valid token ID
        in the vocabulary.
    output_tokens_tensor: The output tokens tensor.
    presence_penalties: The presence penalties of shape (num_seqs, )
    frequency_penalties: The frequency penalties of shape (num_seqs, )
    repetition_penalties: The repetition penalties of shape (num_seqs, )
    r   )apply_repetition_penaltiesr#   r   )r   r,   vllm._custom_opsr4   	unsqueeze)r-   r.   r/   r0   r1   r2   r!   r    _prompt_maskoutput_bin_countsoutput_maskr4   s                r   apply_penaltiesr;   :   s    * "<Hj2j( NA{ &Cj(& &"{
 <;;;;;v{KAUVVV !+++225FFFF
 **q*11K??FMr   layerxweightbiasc                 N    t           j        j                            |||          S Nr   nn
functionallinearr<   r=   r>   r?   s       r   default_unquantized_gemmrG   c   s!     8%%a666r   c                    t          j                    r-t          j                    s|t          j        t          j        fvrdS | dk    r|dk    rdS |dk    r|dk    p/|dk    o|dk    p#|dk    o|dk    p|dk    o|dk    p|dk    o|dk    S )	NFi      i   i@  i      i  )r   is_triton_gemm_enabledr	   is_fp8_fnuzr   float16bfloat16)nmkr$   s       r   use_aiter_triton_gemmrR   l   s    133 ')) 777u 	4xxAGGu	
d	 qDy 	$I#!t)	$H"d	$ H"d	$ I"!s(r   c                    ddl m}m} |                                 |                     d          z  }|j        d         }|j        d         }dd l}t          j        o{ |            oq| j	        t          j        t          j        fv oR|dk    oL|dk    oF|dk    o@|                    |dz            |                    |dz            z  t                      k     }	|	rut                      }
|                     d|                     d                    }t!          j        |||
|          } |j        g | j        d d         |j        d         R  S t%          |||| j	                  rddlm}  || ||          S t          j        o1 |            o'| j	        t          j        t          j        fv o|d	z  dk    }|d
ur&t          j        j                            | ||          S |                     d|                     d                    }|d	k    r\d|cxk     rdk    rOn nLt                      }
t!          j        |||
|          } |j        g | j        d d         |j        d         R  S |dz  dk    rK|dk    rE|dk    r?|=t!          j        ||d          } |j        g | j        d d         |j        d         R  S t          j        j                            | ||          S )Nr   )on_gfx9	on_gfx950r   r#      rJ   rI   )gemm_a16w16   T   i    )vllm.platforms.rocmrT   rU   numelsizer   mathr   VLLM_ROCM_USE_SKINNY_GEMMr$   r   rM   rN   ceilr
   r   ops
wvSplitKrcrR   aiter.ops.triton.gemm_a16w16rW   rC   rD   rE   wvSplitKLLMM1)r=   r>   r?   rT   rU   rO   rP   rQ   r]   use_skinny_reduce_countingcu_countx_viewoutrW   
use_skinnys                  r   rocm_unquantized_gemm_implrj      s    76666666			AFF2JJAQAQAKKK 	& 	
IKK	
Gu~66	
 G HSHCH 		!c'""TYYq2v%6%66G  " ;>>2qvvbzz**nVVXt<<s{:AGCRCL:&,q/::::Q1ag.. ,<<<<<<{1fd+++ 	& 	GII	Gu~66	 EQJ	  x"))!VT:::YYr166"::&&F1uuQ!>>l668T::s{:AGCRCL:&,q/::::	
Q!Q199i**s{:AGCRCL:&,q/::::8%%a666r   c                 f    |                      g | j        d d         |j        d         R           S )Nr   r   )	new_emptyr   r=   r>   r?   s      r   rocm_unquantized_gemm_fakern      s4     ;;7"7v|A77888r   c                 N    t           j        j                            |||          S rA   )r   r`   vllmrocm_unquantized_gemmrF   s       r   rq   rq      s      9>//64@@@r   rq   )op_nameop_func	fake_implrO   rQ   r$   c                     t           j        j                                        o+|t           j        t           j        fv o|dz  dk    o| dz  dk    S )N    r   rV   )r   _C_cpu_is_amx_tile_supportedrN   int8)rO   rQ   r$   s      r   check_cpu_sgl_kernelr{      sR    ,,.. 	u~uz22	FaK	 FaK	r   remove_weightc                   	 | j         j        rt          j        j        j        | _        d S | j                                         \  }}| j         j        }t          j
        rt          |||          rt          j        j                            | j                   	t          | dd           %| j                            t          j                  nd 	fd| _        |r8t          j                            t          j        d          d          | _         d S t          j        rt-          j                    t0          j        k    r	 | j         }t          j        |                                d          fd| _        |r8t          j                            t          j        d          d          | _         d S # t8          $ r'}t:                              d|            Y d }~nd }~ww xY wd	 | _        d S )
Nr?   c                 Z    t           j        j                            | |nd d          S )NT)r   r`   rw   weight_packed_linear)r=   r>   r?   bias_f32packed_weights      r   <lambda>z/dispatch_cpu_unquantized_gemm.<locals>.<lambda>   s-    59<3T3T}$*:hhd4
 4
 r   r   F)requires_gradrv   c                 0    t          j        | |          S rA   )r`   	onednn_mm)r=   r>   r?   handlers      r   r   z/dispatch_cpu_unquantized_gemm.<locals>.<lambda>   s    s}WaQU7V7V r   zEFailed to create oneDNN linear, fallback to torch linear. Exception: c                 N    t           j        j                            | ||          S rA   rB   rm   s      r   r   z/dispatch_cpu_unquantized_gemm.<locals>.<lambda>  s"    ux/B/I/I	640 0 r   )r>   is_metar   rC   rD   rE   
cpu_linearr\   r$   r   VLLM_CPU_SGL_KERNELr{   r`   rw   convert_weight_packedgetattrr?   tofloat32	Parameterempty_supports_onednnr	   get_cpu_architecturer   POWERPCcreate_onednn_mmtRuntimeErrorloggerwarning_once)
r<   r|   r   Kr$   origin_weighter   r   r   s
          @@@r   dispatch_cpu_unquantized_gemmr      s   
 |  8.5<DAqLE $8Au$E$E 	::5<HH5&$''3z}}U]33HHH
 
 
 
 
  	S 8--ek!nnE-RREL133{7JJJ	!LM*=??+<+<bAAGVVVVE W$x11%+a..PU1VVF 	 	 	# # #       	 Es   A2F< <
G-G((G-c                 0    |                      |||          S rA   )r   rF   s       r   cpu_unquantized_gemmr     s     Avt,,,r   .c                  x    t          j                    rt          S t          j                    rt          S t
          S rA   )r	   is_rocmrq   is_cpur   rG    r   r   dispatch_unquantized_gemmr     s6    !! ($$		 	"	" (##''r   rA   )(__doc__collections.abcr   r   rp   r   r`   r   vllm._aiter_opsr   vllm.loggerr   vllm.platformsr   r	   vllm.utils.platform_utilsr
   vllm.utils.torch_utilsr   __name__r   Tensorr   inttupler,   r;   rC   ModulerG   rR   rj   rn   rq   r$   boolr{   r   r   r   r   r   r   <module>r      s   ( ' $ $ $ $ $ $  # # # # # #       * * * * * * # # # # # # 8 8 8 8 8 8 8 8 2 2 2 2 2 2 < < < < < <	X		el u|    *L  5<%&	   "&L&,&  ,& 	&
 &  ,& \& & & &Z !%	7 78?7|7 L7 ,
	7 7 7 7  , HL47 47|47"\47161D47
\47 47 47 47p HL9 9|9"\9161D9
\9 9 9 9 !%	A A8?A|A LA ,
	A
 \A A A A  #&(   C C      ,8?,, 
, , , ,f !%	- -8?-|- L- ,
	- - - -(8C,=#> ( ( ( ( ( (r   