
     `i3T                     z   d Z ddlZddlmZ ddlZddlmZ ddlZddl	m
Z
 ddlmZmZ ddlZddlmZmZ ddlmZmZ  e            rddlZ e            rdd	lmZ dd
lmZ  ej        e          Z	 ddZdee          dej!        de"e ej!        f         de deee          ej!        f         f
dZ#d Z$d Z%d Z&d Z'dS )z!PyTorch - Flax general utilities.    N)UnpicklingError)
from_bytes)flatten_dictunflatten_dict   )is_safetensors_availableis_torch_available)check_torch_load_is_safelogging)	safe_open)	load_fileFc                    |s]t           j                            |          }t                              d|            |                    d          r\i }t          |d          5 }|                                D ]}|                    |          ||<   	 ddd           n# 1 swxY w Y   n	 ddl	}n0# t          t          f$ r t                              d            w xY wt                        |j        |dd	
          }t                              dt          d |                                D                       dd           t#          ||           }	nt%          ||           }	|	S )z(Load pytorch checkpoints in a flax modelzLoading PyTorch weights from .safetensorsflax)	frameworkNr   zLoading a PyTorch model in Flax, requires both PyTorch and Flax to be installed. Please see https://pytorch.org/ and https://flax.readthedocs.io/en/latest/index.html#installation for installation instructions.cpuT)map_locationweights_onlyzPyTorch checkpoint contains c              3   >   K   | ]}|                                 V  d S N)numel).0ts     |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/modeling_flax_pytorch_utils.py	<genexpr>z=load_pytorch_checkpoint_in_flax_state_dict.<locals>.<genexpr>K   s*      :e:e17799:e:e:e:e:e:e    ,z parameters.)ospathabspathloggerinfoendswithr   keys
get_tensortorchImportErrorModuleNotFoundErrorerrorr
   loadsumvalues"convert_pytorch_state_dict_to_flax*convert_pytorch_sharded_state_dict_to_flax)

flax_modelpytorch_checkpoint_path
is_shardedallow_missing_keyspt_pathpt_state_dictfkr&   flax_state_dicts
             r   *load_pytorch_checkpoint_in_flax_state_dictr8   0   s   
  j'//"9::=G==>>>N++ 	wM7f555 7 7 7A'(||AM!$$77 7 7 7 7 7 7 7 7 7 7 7 7 7 7!45   %  
  %&&&&EJwUQUVVVMKKus:e:emNbNbNdNd:e:e:e7e7euuuuvvv<]JWW EE\^hiis   (0B%%B),B)2B7 7-C$pt_tuple_key	pt_tensorrandom_flax_state_dictmodel_prefixreturnc                    dt           t                   dt          ffd}| dd         dz   }| d         dv r ||          r||fS | dd         dz   }| d         d	k    r ||           s||fS | dd         d
z   }| d         dk    r ||           s||fS | dd         dz   }| d         dk    r ||          r||fS | dd         dz   }| d         dk    r2|j        dk    r' ||           s|                    dddd          }||fS | dd         dz   }| d         dk    r ||           s|j        }||fS | dd         dz   }| d         dk    r||fS | dd         dz   }| d         dk    r||fS d}| ddd         dk    r| d         dz   }n| ddd         dk    r| d         dz   }|| dd         |fz   }||fS | |fS )zYRename PT weight names to corresponding Flax weight names and reshape tensor if necessarykeyr=   c                 V    t          t                    | f| z   hz            dk    S )zAChecks if `key` of `(prefix,) + key` is in random_flax_state_dictr   )lenset)r?   r<   r;   s    r   is_key_or_prefix_key_in_dictzCrename_key_and_reshape_tensor.<locals>.is_key_or_prefix_key_in_dict\   s1    3-..#7L1MMNNQRRRr   N)scale)weightgamma)meanrunning_mean)varrunning_var)	embeddingrF   )kernel         r   r   rF   rG   )biasbeta)parametrizations	original0_g)rU   	original1_v)tuplestrboolndim	transposeT)r9   r:   r;   r<   rC   renamed_pt_tuple_keynames     ``   r   rename_key_and_reshape_tensorrc   T   s   S%* S S S S S S S S
 (,z9B...3O3OPd3e3e.#Y.. (,y8B>))2N2N|2\2\)#Y.. (,x7B=((1M1Ml1[1[(#Y.. (,~=B8##(D(DEY(Z(Z##Y.. (,{:B8##	!(;(;D`D`amDnDn(;''1a33	#Y.. (,{:B8##,H,H,V,V#K	#Y.. (,{:B7""#Y.. (,y8B6!!#Y.. DBEE???B$&	be!e	 A	A	AB$&+CRC0D7:#Y..""r   c           
      N   t                      oEt          t          t          |                                                     t
          j                  }|rt
          j        nd}d |                                 D             }|rb|                                 D ]M\  }}|j	        |k    r|
                                }|                                                                | |<   N|j        }d|j        v r|j        d         }n|j        }t          |          }	d|j        v r/t          |j        d                   }
|	                    |
           i }||vo|d | D             v }||v o|d | D             v}|                                 D ]\  }}t#          |                    d                    }||         |k    }|d         |k    }|r|r
|d	d          }t'          |||	|          \  }}|f|z   |	v }|r|r|f|z   }||	v r?|j        |	|         j        k    r)t+          d
| d|	|         j         d|j         d          d|j        v rd|d         v s
d|d         v rt-          j        |          |d|z   <   d|d         v r|                    |d            |st-          j        |          nt-          j        |t,          j                  |d|z   <   P|st-          j        |          nt-          j        |t,          j                  ||<   t3          |          S )Nbfloat16c                 $    i | ]\  }}||j         S  dtyper   r6   vs      r   
<dictcomp>z6convert_pytorch_state_dict_to_flax.<locals>.<dictcomp>   s     BBBDAqQBBBr   paramsbatch_statsc                 D    h | ]}|                     d           d         S .r   splitr   r6   s     r   	<setcomp>z5convert_pytorch_state_dict_to_flax.<locals>.<setcomp>   s&    @@@Qa@@@r   c                 D    h | ]}|                     d           d         S rp   rr   rt   s     r   ru   z5convert_pytorch_state_dict_to_flax.<locals>.<setcomp>   &    DDDQWWS\\!_DDDr   rq   r   r   1PyTorch checkpoint seems to be incorrect. Weight  was expected to be of shape 	, but is rH   rD   rJ   rn   num_batches_trackedrh   rm   )r	   
isinstancenextiterr,   r&   Tensorre   itemsri   floatr   numpybase_model_prefixrm   r   updater[   rs   rc   shape
ValueErrorjnpasarraypopr   )r4   r/   from_binre   weight_dtypesr6   rk   r<   flax_model_paramsr;   flax_batch_statsr7   $load_model_with_head_into_base_model$load_base_model_into_model_with_headpt_keyr:   r9   is_bfloat_16has_base_model_prefixflax_keyflax_tensorrequire_base_model_prefixs                         r   r-   r-      s   !##d
4]=Q=Q=S=S8T8T3U3UW\Wc(d(dH!)9u~~zHBBM,?,?,A,ABBBM /!'')) 	/ 	/DAqw(""GGII uuww}}M!/L :$$$&-h7&-)*;<< 
)))'
(9-(HII%%&6777O,8@Q,Q ,@@-@@@@ ) -9<M,M ,DDmDDDD )
 +0022 , ,	V\\#..//$V,8 !-Q< ?/ 	,4I 	,'+L !>)%;\!
 !
+
 &2Oh$>BX$X!/ 	24M 	2$1H--- $:8$D$JJJ ] ] ]-h7=] ]HSHY] ] ]   J---"%%(2,)>)>?B{;?W?W 08 ;<$44##Hd333 1=nK(((#+kadamBnBnBn K(233 1=nK(((#+kadamBnBnBn H%% /***r   c                    dd l i }| D ]}t                        j        |d          }d |                                D             }fd|                                D             }|j        }d|j        v rJ|j        d         }t          |          }|                    t          |j        d                              n|j        }t          |          }||vo|d |D             v }	||v o|d	 |D             v}
|                                D ]\  }}t          |	                    d
                    }||         j
        k    }|d         |k    }|	r|r
|dd          }t          ||||          \  }}|f|z   |v }|
r|r|f|z   }||v r?|j        ||         j        k    r)t          d| d||         j         d|j         d
          d|j        v rd|d         v rt          j        |          |d|z   <   d|d         v rt          j        |          |d|z   <   d|d         v r|                    |d            3|st          j        |          nt          j        |t          j
                  |d|z   <   q|st          j        |          nt          j        |t          j
                  ||<   t#          |          S )Nr   T)r   c                 $    i | ]\  }}||j         S rg   rh   rj   s      r   rl   z>convert_pytorch_sharded_state_dict_to_flax.<locals>.<dictcomp>   s     FFF1AGFFFr   c                     i | ]P\  }}||j         j        k    r|                                n%|                                                                QS rg   )ri   re   r   r   )r   r6   rk   r&   s      r   rl   z>convert_pytorch_sharded_state_dict_to_flax.<locals>.<dictcomp>   sZ     
 
 
QUQRTUAAGu~55qwwyyy17799??;L;L
 
 
r   rn   rm   c                 D    h | ]}|                     d           d         S rp   rr   rt   s     r   ru   z=convert_pytorch_sharded_state_dict_to_flax.<locals>.<setcomp>  rw   r   c                 D    h | ]}|                     d           d         S rp   rr   rt   s     r   ru   z=convert_pytorch_sharded_state_dict_to_flax.<locals>.<setcomp>  s&     H H HQa H H Hr   rq   r   rx   ry   rz   rH   rD   r{   rJ   r|   rh   r}   )r&   r
   r*   r   r   rm   r   r   r[   rs   re   rc   r   r   r   r   r   r   )shard_filenamesr/   r7   
shard_filer4   r   r<   r   r;   r   r   r   r:   r9   r   r   r   r   r   r&   s                      @r   r.   r.      s   LLL O% K K
 """"
:DAAAFF0C0C0E0EFFF
 
 
 
YfYlYlYnYn
 
 
 "3 J--- * 1( ;%12C%D%D"")),z7H7W*X*XYYYY * 1%12C%D%D"0<DU0U 0
DDmDDDD 	- 1=@Q0Q 0
 H H- H H HH 	- "/!4!4!6!6 /	 /	FI c!2!233L(0ENBL %1O|$C!3 08M 0+ABB/ %Bi)?% %!Hk *6((BF\(\%3 68Q 6(?X5111$(>x(H(NNN$aF a a1(;Aa aLWL]a a a   
 111Xb\))CF;{C[C[O$4x$?@HRL((CF;{C[C[O$4x$?@(HRL88#''$777 5ArCK,,,ckR]eheqFrFrFr  h 677 5ArCK,,,ckR]eheqFrFrFr  ))[/	` /***r   c                 &   t           j                            |          }t                              d|            t          t          d| j        j        z             }|	                    d          r!t          |          }t          |d          }nnt          |d          5 }	 t          ||                                          }n!# t          $ r t!          d| d          w xY w	 d	d	d	           n# 1 swxY w Y   t#          | |          S )
(Load flax checkpoints in a PyTorch modelzLoading Flax weights from Flaxr   rq   )seprbzUnable to convert z  to Flax deserializable object. N)r   r   r    r!   r"   getattrtransformers	__class____name__r#   safe_load_filer   openr   readr   OSError"load_flax_weights_in_pytorch_model)modelflax_checkpoint_pathflax_clsr7   state_fs        r   %load_flax_checkpoint_in_pytorch_modelr   L  sl   7??+?@@
KKC-ACCDDD |Veo.F%FGGH $$^44 k()=>>(cBBB&-- 	kk",Xw||~~"F"F" k k ki3Giiijjjk  	k 	k 	k 	k 	k 	k 	k 	k 	k 	k 	k 	k 	k 	k 	k .e_EEEs*   %C8'"C
	C8
C((C88C<?C<c                 
   	 ddl }n0# t          t          f$ r t                              d            w xY wt          t          j                            d |                    	                                }t          |          r;t                              d           t          j                            d |          }t          |          }|                                 }| j        |v o| j        d |D             v}| j        |vo| j        d |D             v }g }t          |                                          }	|                                D ]\  }
}|
d         | j        k    }d	                    | j        f|
z             |v }|r|r|
d
d         }
n|r|r| j        f|
z   }
|
d         dk    rE|j        dk    r:d	                    |
          |vr#|
dd         dz   }
t'          j        |d          }n|
d         dk    r,d	                    |
          |vr|
dd         dz   }
|j        }nG|
d         dv r|
dd         dz   }
n/d|
d         v r|
dd         dz   }
nd|
d         v r|
dd         dz   }
d|v rd	                    |
d
d                   }nd	                    |
          }i }|D ]|}|                    d	          }d}|ddd         ddgk    r|d         dz   }n|ddd         ddgk    r|d         dz   }|(|dd         |gz   }d	                    |          }|||<   }||v r||         }||v r|j        ||         j        k    r)t1          d|
 d||         j         d |j         d	          t3          |t4          j                  st5          j        |          n|} |j        |          ||<   |	                    |           |                    |           |                      |           tC          |	          }	tE          |          dk    rFt                              d!| j#        j$         d"| d#| j#        j$         d$| j#        j$         d%	           n(t                              d&| j#        j$         d'           tE          |	          dk    r,t                              d(| j#        j$         d)|	 d*           n5t                              d+| j#        j$         d,| j#        j$         d-           | S ).r   r   NzLoading a Flax weights in PyTorch, requires both PyTorch and Flax to be installed. Please see https://pytorch.org/ and https://flax.readthedocs.io/en/latest/index.html#installation for installation instructions.c                 ,    | j         t          j        k    S r   )ri   r   re   )xs    r   <lambda>z4load_flax_weights_in_pytorch_model.<locals>.<lambda>p  s    CLAX r   zFound ``bfloat16`` weights in Flax model. Casting all ``bfloat16`` weights to ``float32`` before loading those in PyTorch model.c                 n    | j         t          j        k    r|                     t          j                  n| S r   )ri   r   re   astypenpfloat32r}   s    r   r   z4load_flax_weights_in_pytorch_model.<locals>.<lambda>y  s(    8T8T6==444Z` r   c                 D    h | ]}|                     d           d         S rp   rr   rt   s     r   ru   z5load_flax_weights_in_pytorch_model.<locals>.<setcomp>  s&    *R*R*Rq1773<<?*R*R*Rr   c                 D    h | ]}|                     d           d         S rp   rr   rt   s     r   ru   z5load_flax_weights_in_pytorch_model.<locals>.<setcomp>  s&    &N&N&N1qwws||A&N&N&Nr   rq   r   rD   rM   rN   rQ   )rP   rO   r   r   )rE   rL   rH   )rI   rJ   )rK   rn   rT   rO   rU   rV   rW   rX   rY   rZ   z.Flax checkpoint seems to be incorrect. Weight ry   rz   zQSome weights of the Flax model were not used when initializing the PyTorch model z: z,
- This IS expected if you are initializing z from a Flax model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).
- This IS NOT expected if you are initializing z from a Flax model that you expect to be exactly identical (e.g. initializing a BertForSequenceClassification model from a FlaxBertForSequenceClassification model).z3All Flax model weights were used when initializing z.
zSome weights of zE were not initialized from the Flax model and are newly initialized: zo
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.zAll the weights of z were initialized from the Flax model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use z* for predictions without further training.)%r&   r'   r(   r!   r)   r   jax	tree_utiltree_mapr,   anywarning
state_dictr   rB   r$   r   joinr^   r   r_   r`   rs   r   r   r~   r   ndarrayr   
from_numpyremoveappendload_state_dictlistrA   r   r   )pt_model
flax_stater&   is_type_bf16r7   pt_model_dictr   r   unexpected_keysmissing_keysflax_key_tupler   r   r   r   special_pt_namesr?   key_componentsrb   key_to_checks                       r   r   r   b  s   ,-   	
 	
 	

 	   6 67X7XZd e effmmooL
< 	
 	5	
 	
 	
 ]++``bl
 

 #:..O''))M,4,F*,T ,"*R*RM*R*R*RR ) -5,Fj,X ,"&N&N&N&N&NN )
 O}))++,,L'6'<'<'>'> A- A-# .q 1X5O O$'HHh.H-J^-[$\$\`m$m! 0 	L4I 	L+ABB/NN1 	L6O 	L&8:^KN "))k.>!.C.CQ_H`H`huHuHu+CRC0;>N-\BBKKB8++0H0HP]0]0]+CRC0;>N%-KKB#999+CRC0;>NN ~b)))+CRC03DDNNnR(((+CRC03CCNJ&&xxqrr 233HHxx//H   
	5 
	5C YYs^^NDbe!e$);[(III%b)D0A&+={*KKK%b)D0!/!4v!="xx7714 .''''1H}$$ M($;$AAA c^ c c&3H&=&Cc cNYN_c c c   >HUWU_=`=`qbj555fq*:%*:;*G*Gh'##H---- ""8,,,,]+++ %%L
?a9"+9 9/>9 9"+9 9 9A8J8S	9 9 9	
 	
 	
 	
 	mXM_Mhmmmnnn
<15x1: 5 5)5 5 5	
 	
 	
 	
 	k("4"= k k#+#5#>k k k	
 	
 	
 Os    -4)F)(__doc__r   pickler   r   	jax.numpyr   r   r   flax.serializationr   flax.traverse_utilr   r   r    r   r	   utilsr
   r   r&   safetensorsr   safetensors.flaxr   r   
get_loggerr   r!   r8   r[   r\   r   dictrc   r-   r.   r   r   rg   r   r   <module>r      s   ( ' 				 " " " " " " 



           ) ) ) ) ) ) ; ; ; ; ; ; ; ;     : : : : : : : : 4 4 4 4 4 4 4 4  LLL =%%%%%%<<<<<< 
	H	%	% IN! ! ! !H@#*@#z@# !ck!12@# 	@#
 5:rz!"@# @# @# @#FT+ T+ T+xQ+ Q+ Q+rF F F,I I I I Ir   