
    PiJ                        d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlmZ d dlm	Z
 d dlmZ dQdZe G d	 d
                      Z eddddd           eddd           eddd           eddd           eddddddd           eddddd           eddddd d!           ed"d"d#           ed$d$d%           edddddd d&d'(           ed)ddddd d&d'd*+	  	         ed)dddddd&d'd*+	  	         ed)d,d-ddd.d&d'd*+	  	         ed)d/d0dd1dd&d'd*d*2
  
        d3Zd4Z G d5 d6ej                  Zd d7lmZ  G d8 d9ej                  Z G d: d;ej                  Z G d< d=ej                  Z G d> d?ej                  Z G d@ dAej                  Z G dB dCej                  ZdDej        fdEZdFej        d4fdGedHedIedJej        dKe dLefdMZ!dNedOedLefdPZ"dS )R    N)	dataclass)Optional)Tensor)
functional)find_multiple   c                 H   |                                  dk    r$t          d|                                             t          j        d| j        d         | j                                      t          j                  }|                     d| j        d                   |fS )N   z,Expected input to be of dim 1 or 2, but got r   device)	dim
ValueErrortorcharangeshaper   toint32view)inpsmax_new_tokens	input_poss      o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/_models/llama/model.pyprepare_inputs_for_modelr      s    xxzzA~~T

TTUUU Q
2t{CCCFFu{SSIIIb$*R.))955    c                       e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed	<   d
Z
eed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zedefd            Zd
S )	ModelArgsi   
block_size }  
vocab_size    n_layern_head   r   Nintermediate_sizer   n_local_heads@   head_dim'  	rope_baseh㈵>norm_epsFuse_scaled_ropetie_word_embeddingsc                     | j         dk    r| j        | _         | j        4d| j        z  }t	          d|z  dz            }t          |d          | _        | j        | j        z  | _        d S )Nr      r
         )r&   r#   r%   r   intr   r(   )self
hidden_dimn_hiddens      r   __post_init__zModelArgs.__post_init__,   sj    ##!%D!)TXJ1z>A-..H%28S%A%AD"DK/r   namec                 ^   t           v r | di t                    S fdt           D             }t          |          dk    rR|                    t          d           t          |d                   t          |d                   k    s
J              | di t           |d                  S )Nc                 |    g | ]8}|t                                                    v s|t                    v 6|9S  )strupper).0configr8   s     r   
<listcomp>z'ModelArgs.from_name.<locals>.<listcomp>:   sN     
 
 
T****fD		.A.A .A.A.Ar   r   T)keyreverser   r;   )transformer_configslensort)clsr8   r?   s    ` r   	from_namezModelArgs.from_name5   s    &&&333,T2333
 
 
 
-
 
 
 v;;??KKCK...vay>>S^^333 433 s44(3444r   )__name__
__module____qualname__r   r3   __annotations__r    r"   r#   r   r%   r&   r(   r*   floatr,   r-   boolr.   r7   classmethodr<   rG   r;   r   r   r   r      s        JJGSFCCOOO!s!!!M3HcIuHe!OT!!! %%%%0 0 0 5S 5 5 5 [5 5 5r   r   i @  r   r!   r$   i@B )r   r    r"   r   r*   )r"   r#   r   (   i   <   4   i   0   r'          i V  )r"   r#   r   r    r&   r%   r*   P   i p  )r"   r#   r   r&   r%   i 8  )r"   r#   r&   r   r%   r       i      i   i  i  )r   r"   r#   r&   r   r%   r    r*   i   T)	r   r"   r#   r&   r   r%   r    r*   r-   ~      i         i   )
r   r"   r#   r&   r   r%   r    r*   r-   r.   )zCodeLlama-7b-Python-hf7B13B30B34B70Bz
Mistral-7B
stories15Mstories110Mz
Llama-3-8BzLlama-3.1-8BzLlama-3.1-70BzLlama-3.1-405BzLlama-3.2-3BFc                   2     e Zd Zej        f fd	Zd Z xZS )KVCachec                     t                                                       ||||f}|                     dt          j        ||                     |                     dt          j        ||                     d S )Nk_cachedtypev_cache)super__init__register_bufferr   zeros)r4   max_batch_sizemax_seq_lengthn_headsr(   rh   cache_shape	__class__s          r   rk   zKVCache.__init__   sy     	%wIYKu(M(M(MNNNYKu(M(M(MNNNNNr   c                 Z   |j         d         |j         d         k    sJ t          r]t          j        j                            | j        d d |g|          }t          j        j                            | j        d d |g|          }n&| j        }| j        }||d d d d |f<   ||d d d d |f<   ||fS )Nr   r
   )r   use_index_put_for_kv_cacher   opsaten
index_put_rf   ri   )r4   r   k_valv_valk_outv_outs         r   updatezKVCache.update   s    q!U[^3333% 	+IN--tT95u E IN--tT95u EE LELE%*E!!!QQQ	/"%*E!!!QQQ	/"e|r   )rH   rI   rJ   r   bfloat16rk   r|   __classcell__rr   s   @r   rd   rd      sW        GL~O O O O O O      r   rd   )%_quantize_activation_per_token_absmaxc                   H     e Zd Zej        f fd	Zd Zed             Z xZ	S )AffineQuantizedKVCachec                    t                                                       ||||f}|||df}|                     dt          j        |t          j                             |                     dt          j        |t          j                             |                     dt          j        ||                     |                     dt          j        ||                     d S )Nr   rf   rg   ri   k_cache_scalev_cache_scale)rj   rk   rl   r   rm   int8ones)	r4   rn   ro   rp   r(   scale_dtyperq   scale_shaperr   s	           r   rk   zAffineQuantizedKVCache.__init__   s     	%wI%wBYKuz(R(R(RSSSYKuz(R(R(RSSSUZ;GGG	
 	
 	
 	UZ;GGG	
 	
 	
 	
 	
r   c                    t          |          \  }}|| j        d d d d |f<   |                    d          | j        d d d d |f<   | j        | j        z  }||d d d d |f<   t          |          \  }}|| j        d d d d |f<   |                    d          | j        d d d d |f<   | j        | j        z  }	||	d d d d |f<   ||	fS )Nr   )r   rf   	unsqueezer   ri   r   )
r4   r   rx   ry   q_k_valk_scalerz   q_v_valv_scaler{   s
             r   r|   zAffineQuantizedKVCache.update   s   @GG(/QQQ9_%.5.?.?.C.C111aaa?+t11!&aaaIo@GG(/QQQ9_%.5.?.?.C.C111aaa?+t11!&aaaIoe|r   c                 ^    |j         j        }|\  }}}}|j         j        } | |||||          S N)rf   r   rh   )rF   kv_cacherq   rn   rp   ro   r(   r   s           r   
from_floatz!AffineQuantizedKVCache.from_float   s>    &,<G9&,s>>7HkRRRr   )
rH   rI   rJ   r   r}   rk   r|   rN   r   r~   r   s   @r   r   r      su         N
 
 
 
 
 
(    S S [S S S S Sr   r   c                        e Zd Zdeddf fdZ	 	 	 	 ddefdZd Zdd	ed
e	e         defdZ
edefd            Z xZS )Transformerr?   returnNc                    t                                                       | _        t          j        j        j                  | _        t          j        fdt          j
                  D                       | _        t          j        j                  | _        t          j        j        j        d          | _        d | _        d | _        d| _        d| _        d S )Nc              3   6   K   | ]}t                    V  d S r   )TransformerBlock)r>   _r?   s     r   	<genexpr>z'Transformer.__init__.<locals>.<genexpr>   s=       $
 $
)*V$$$
 $
 $
 $
 $
 $
r   )epsFbiasr   )rj   rk   r?   nn	Embeddingr    r   tok_embeddings
ModuleListranger"   layersRMSNormr,   normLinearoutput	freqs_cis
mask_cachern   ro   r4   r?   rr   s    `r   rk   zTransformer.__init__   s     l6+<fjIIm $
 $
 $
 $
.3FN.C.C$
 $
 $
 
 
 FJFO<<<	i
F,=EJJJ+/,0  r   Ftrainingc           	         | j         |k    r| j        |k    rd S | j        j        | j        j        z  }t          |d          }|| _         || _        d }t          | j        d          r| j        j        j	        }t          | j        d          r| j        j
        j	        }n&t          | j        d          r| j        j        j	        }|| _        | j        sCt          j        t          j        | j         | j         t          j                            | _        nU||dk    s
J d            t          j        ddd| j         t          j                  | _        d| j        d d d d d d d |f<   |s| j        D ]}	|rt          j        d          5  t+          ||| j        j        ||          |	j        _        d d d            n# 1 swxY w Y   t2                              |	j        j                  |	j        _        t+          ||| j        j        ||          |	j        _        t7          | j        j        | j        j        | j        j        z  | j        j        || j        j        	          | _        d S )
NrT   weightscalesscales_and_zerosrg   r   zLneed to set prompt_length>1 to use non quadratic causal mask in setup_cachesmeta)
use_scaled) ro   rn   r?   r   r#   r   hasattrr   r   rh   r   r   linear_causal_maskr   trilr   rM   causal_maskrm   r   r   rd   r&   	attentionr   r   r   precompute_freqs_cisr   r*   r-   r   )
r4   rn   ro   r   kv_cache_quantizationr   prompt_lengthr(   rh   bs
             r   setup_cacheszTransformer.setup_caches  s    >11#~55F;?dk&88&~q99,,4;)) 	-K&,E4;)) 	7K&,EET["455 	7K06E"4& 	:$z
4.0C5:VVV   D !,1B1B1B^ 2C1BB  %{1a,EJ     D 9:DQQQ111n}n45 	[  ( f--  /6** K5$!0 0,               ,B+L+L,, ,AK(( ,3&&1 , ,AK(( .K"KOt{11K!{2
 
 
s   (G  G	G	c                 >    d| _         d| _        d| _        d| _        dS )zReset caches.

        The caches used by training stage and inference stage may be different, reset them before switching.
        r   N)rn   ro   r   r   )r4   s    r   reset_cacheszTransformer.reset_cachesL  s&    
 ! +/,0r   idxr   c                    | j         
J d            |d}| j         d|j        d                  }n| j        s| j        dd|f         }nt	          |          dk    ry| j        rrt          j        t          j        t	          |          | j        t
          j	        |j
                                                d                              d          }nd| j        ddd|f<   | j        }| j         |         }|                     |          }t          | j                  D ]\  }} |||||          }|                     |          }|                     |          }|S )aZ  Forward pass of the model.

        Args:
            idx  (`torch.LongTensor` of shape `(batch_size, seq_length)`):
                Indices of input sequence tokens in the vocabulary.
            input_pos (`torch.LongTensor` of shape `(batch_size, seq_length)`, *optional*):
                Indices of positions of each input sequence tokens in the position embeddings.
                This argument is optional for training mode but required for
                inference mode(when model.setup_caches(training=False) is used).

        Returns:
            Tensor: The output logits tensor.
        Nz Caches must be initialized firstr   rh   r   r   )r   r   r   r   rD   r   r   r   ro   rM   r   r   r   	enumerater   r   r   )	r4   r   r   maskr   xilayerlogitss	            r   forwardzTransformer.forwardV  sl    ~))+M)))D~1~6II* ('dI(=>I""t'>" J
	NN /"'*#,#3	    Yq\\Yq\\  89 Aq)!34'y1I$$!$+.. 	5 	5HAuaIt44AAIIaLLQr   r8   c                 H     | t                               |                    S r   )r   rG   )rF   r8   s     r   rG   zTransformer.from_name  s     s9&&t,,---r   )FNFNr   )rH   rI   rJ   r   rk   rM   r   r   r   r   r   rN   r<   rG   r~   r   s   @r   r   r      s        !y !T ! ! ! ! ! !( " F
 F
 	F
 F
 F
 F
P1 1 10 06 0hv.> 0& 0 0 0 0d .S . . . [. . . . .r   r   c            
       \     e Zd Zdeddf fdZdedee         dedee         def
d	Z xZS )
r   r?   r   Nc                    t                                                       t          |          | _        t	          |          | _        t          |j        |j                  | _	        t          |j        |j                  | _
        d S r   )rj   rk   	Attentionr   FeedForwardfeed_forwardr   r   r,   ffn_normattention_normr   s     r   rk   zTransformerBlock.__init__  si    "6**'//
FO<<%fj&/BBr   r   r   r   r   c                     ||                      |                     |          |||          z   }||                     |                     |                    z   }|S r   )r   r   r   r   )r4   r   r   r   r   houts          r   r   zTransformerBlock.forward  sW     t22155y$	RRR$##DMM!$4$4555
r   )	rH   rI   rJ   r   rk   r   r   r   r~   r   s   @r   r   r     s        Cy CT C C C C C C		 F#	 		
 v	 
	 	 	 	 	 	 	 	r   r   c                   b     e Zd Zdef fdZd Z	 ddededee         dee         d	ef
d
Z xZ	S )r   r?   c                    t                                                       |j        |j        z  dk    sJ |j        d|j        z  z   |j        z  }t          j        |j        |d          | _        t          j        |j        |j        d          | _	        d | _
        |j        | _        |j        | _        |j        | _        |j        | _        |                     | j                   d S )Nr   r
   Fr   )rj   rk   r   r#   r&   r(   r   r   wqkvwor   "_register_load_state_dict_pre_hook	load_hook)r4   r?   total_head_dimrr   s      r   rk   zAttention.__init__  s    zFM)Q.... -!f.B*BBfoUIfj.uEEE	)FJ
???m#1://?????r   c                     |dz   |v rg|                     |dz             }|                     |dz             }|                     |dz             }t          j        |||g          ||dz   <   d S d S )Nz	wq.weightz	wk.weightz	wv.weightzwqkv.weight)popr   cat)r4   
state_dictprefixargswqwkwvs          r   r   zAttention.load_hook  s    K:-- 455B 455B 455B16BB<1H1HJv-...	 .-r   Nr   r   r   r   r   c                    |j         \  }}}| j        | j        z  }|                     |                              | j        ||gd          \  }	}
}|	                    ||| j        | j                  }	|
                    ||| j        | j                  }
|                    ||| j        | j                  }t          |	|          }	t          |
|          }
t          d |	|
|f          \  }	}
}| j
        | j
                            ||
|          \  }
}|
                    | j        | j        z  d          }
|                    | j        | j        z  d          }|t          j        |	|
||d          }nt          j        |	|
|dd          }|                    dd	                                                              ||| j                  }|                     |          }|S )
Nr   r   c                 .    |                      dd          S )Nr   r
   )	transpose)r   s    r   <lambda>z#Attention.forward.<locals>.<lambda>  s    Aq 1 1 r   r   g        )	attn_mask	dropout_pT)r   	is_causalr
   )r   r&   r(   r   splitr   r   r#   apply_rotary_embmapr   r|   repeat_interleaveFscaled_dot_product_attentionr   
contiguousr   )r4   r   r   r   r   bszseqlenr   kv_sizeqkvys                r   r   zAttention.forward  s    VQ$t}4))A,,$$dh%Ar$JJ1aFF3T];;FF3 2DMBBFF3 2DMBBQ	**Q	**11Aq!9==1a=$=''	1a88DAqt/A AqIIt/A AqII.q!Q$RUVVVAA.q!Q#QUVVVAKK1((**//VTXFFGGAJJr   r   )
rH   rI   rJ   r   rk   r   r   r   r   r~   r   s   @r   r   r     s        @y @ @ @ @ @ @ I I I '+" "" " v	"
 F#" 
" " " " " " " "r   r   c                   8     e Zd Zdeddf fdZdedefdZ xZS )r   r?   r   Nc                 ,   t                                                       t          j        |j        |j        d          | _        t          j        |j        |j        d          | _        t          j        |j        |j        d          | _        d S )NFr   )	rj   rk   r   r   r   r%   w1w3w2r   s     r   rk   zFeedForward.__init__  st    )FJ(@uMMM)FJ(@uMMM)F4fjuMMMr   r   c                     |                      t          j        |                     |                    |                     |          z            S r   )r   r   silur   r   r4   r   s     r   r   zFeedForward.forward  s7    wwqvdggajj))DGGAJJ6777r   )rH   rI   rJ   r   rk   r   r   r~   r   s   @r   r   r     su        Ny NT N N N N N N8 8F 8 8 8 8 8 8 8 8r   r   c                   @     e Zd Zd	dedef fdZd ZdedefdZ xZ	S )
r   r+   r   r   c                     t                                                       || _        t          j        t          j        |                    | _        d S r   )rj   rk   r   r   	Parameterr   r   r   )r4   r   r   rr   s      r   rk   zRMSNorm.__init__  s=    l5:c??33r   c                 p    |t          j        t          j        ||z  dd          | j        z             z  S )Nr   T)r   keepdim)r   rsqrtmeanr   r  s     r   _normzRMSNorm._norm  s3    5;uz!a%RFFFQRRRRr   r   r   c                     |                      |                                                              |          }|| j        z  S r   )r	  rL   type_asr   )r4   r   r   s      r   r   zRMSNorm.forward  s6    AGGII&&..q11##r   )r+   )
rH   rI   rJ   r3   rL   rk   r	  r   r   r~   r   s   @r   r   r     s        4 4C 4e 4 4 4 4 4 4
S S S$ $F $ $ $ $ $ $ $ $r   r   freqsc                    d}d}d}d}||z  }||z  }g }| D ]}dt           j        z  |z  }	|	|k     r|                    |           0|	|k    r|                    ||z             O||k    sJ ||	z  |z
  ||z
  z  }
|                    d|
z
  |z  |z  |
|z  z              t          j        || j        | j                  S )NrT   r   r0   rS   r
   r   )mathpiappendr   tensorrh   r   )r  scale_factorlow_freq_factorhigh_freq_factorold_context_lenlow_freq_wavelenhigh_freq_wavelen	new_freqsfreqwavelensmooths              r   apply_scalingr    s   LOO&8'*::I Q Qdg+$&&&T""""'''TL01111#'88888%//A ?2F a&jD0<?&4-OPPPP<	U\JJJJr   r)   seq_lenn_elembaserh   r   r   c                    d|t          j        d|d          d |dz                                           |z  z  z  }t          j        | |j                  }|rt	          |          }t          j        ||          }t          j        t          j        |          |          }t          j        |j	        |j
        gd          }|                    |          S )Ng      ?r   r
   r   r   r   rg   )r   r   rL   r   r  outerpolar	ones_likestackrealimagr   )	r  r  r  rh   r   r  tr   caches	            r   r   r     s     a++Ov{O<BBDDvMNE 	WU\222A %e$$K5!!EEOE22E::IK8bAAAE88%8   r   r   r   c                     |                                  j        g | j        d d         ddR  }|                    d|                    d          d|                    d          d          }t          j        |d         |d         z  |d         |d         z  z
  |d         |d         z  |d         |d         z  z   gd          }|                    d          }|                    |           S )Nr   r
   r   r1   ).r   ).r   )	rL   reshaper   r   sizer   r$  flattenr  )r   r   xshapedx_out2s       r   r   r   #  s    aggii5"5r51555Gq',,q//1gll1ooqIII[FOi//'&/IfDU2UUFOi//'&/IfDU2UU	
 	 F ^^AF>>!r   )r   )#r  dataclassesr   typingr   r   torch.nnr   r   r   r   torchao.utilsr   r   r   dictrC   rt   Modulerd   torchao.quantization.utilsr   r   r   r   r   r   r   r  r}   r3   rh   rM   r   r   r;   r   r   <module>r6     s    ! ! ! ! ! !                    $ $ $ $ $ $ ' ' ' ' ' '6 6 6 6 *5 *5 *5 *5 *5 *5 *5 *5\ #dUBDG   $r"$
/
/
/4240004240004   424qE   $   $q444423777$	 	 	 D
 
 
 T
 
 
 d
 
 
 D   QT T p #     bi   : M L L L L L*S *S *S *S *SRY *S *S *SZW. W. W. W. W.") W. W. W.t    ry   (: : : : :	 : : :z8 8 8 8 8") 8 8 8$ $ $ $ $bi $ $ $K K K K K8 ! !!! ! ;	!
 ! ! ! ! !& 6 f      r   