
     `iKT                         d dl mZmZmZmZ d dlZd dlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ  ej         e!          Z" G d de          Z#dS )    )AnyCallableOptionalUnionN   )(DiaClassifierFreeGuidanceLogitsProcessor"DiaEOSChannelFilterLogitsProcessor!DiaEOSDelayPatternLogitsProcessorLogitsProcessorListTemperatureLogitsWarper)StoppingCriteriaList)BaseStreamer)GenerateOutputGenerationConfigGenerationMixinGenerationMode)is_deepspeed_zero3_enabled)is_fsdp_managed_module)PreTrainedModel)loggingc                       e Zd ZdZ	 	 	 	 	 	 	 	 d%dedee         deej                 dee	eej
        gee         f                  dee         dee         deeeef                  d	eej
                 d
eej
                 def fdZ	 d&dee         dee         dedeeef         f fdZ	 	 	 d'deej
                 deej
                 deeeej
        f                  deej
        ee         eeej
        f         f         f fdZ	 d&dededeeej
        f         dej
        deej                 deej        eeej
        f         f         fdZ	 	 d( fd	Zedej
        dedeej
                 dej
        fd            Z	 	 	 	 	 	 	 	 	 	 	 	 d)deej
                 dee         dee         dee         dee	eej
        gee         f                  dee         ded         d ed!         d	eej
                 d
eej
                 dee         d"ee         fd#Z ej                    	 	 	 	 	 	 	 	 	 	 	 	 d)deej
                 dee         dee         dee         dee	eej
        gee         f                  dee         ded         d ed!         d	eej
                 d
eej
                 dee         d"ee         deeej        f         fd$            Z xZ S )*DiaGenerationMixinNgeneration_configinput_ids_seq_lengthencoder_input_idsprefix_allowed_tokens_fnlogits_processordevicemodel_kwargsnegative_prompt_idsnegative_prompt_attention_maskreturnc
                    |j         }
|j        }d |_         d |_        t                      }|(|dk    r"|                    t	          |                     |                    t          t          | j        j                  | j        j	                             t                                          |||d |||||		  	        }|
2|
dk    r,t          |
|j                  }|                    d|           |                    t          | j        j        | j        j	        |j        |                     |
|_         ||_        |S )Ng      ?)num_channelseos_token_id	r   r   r   r   r   r   r   r    r!      )guidance_scaleguidance_top_kr   )delay_patternr%   max_generation_lenr   )r(   temperaturer   appendr   r	   lenconfigr*   r%   super_get_logits_processorr   top_kinsertr
   
max_length)selfr   r   r   r   r   r   r   r    r!   original_guidance_scaleoriginal_temperaturecustom_processorsmerged_processorscfg_processor	__class__s                  z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/dia/generation_dia.pyr1   z(DiaGenerationMixin._get_logits_processor,   s    #4"B0<+/((,% 011+0D0K0K$$%<=Q%R%RSSS  . !:;;![5  	
 	
 	
 "GG99/!5/%).% 3+I : 

 

 #.3Ja3O3OD606  M $$Q666  -"k7![5#4#?	  	
 	
 	
 ,C((<%      use_model_defaultskwargsc                      t                      j        ||fi |\  }}|xj        t          | j        j                  z  c_        |j        d uo
|j        dk    | _        ||fS Nr'   )r0   _prepare_generation_configr4   maxr/   r*   r(   	_uses_cfg)r5   r   r>   r?   r   r;   s        r<   rB   z-DiaGenerationMixin._prepare_generation_configo   s     +M%''*L1+
 +
5;+
 +
'< 	$$DK,E(F(FF$$ +9EoJ[JjnoJo ,..r=   inputsbos_token_idc                 .   t                                          |||          \  }}}| j        rat          j        |          }t          j        ||gd          }|                    dd           |d                             dd          |d<   |||fS )N)rE   rF   r   r   dimattention_mask   r'   )r0   _prepare_model_inputsrD   torch
zeros_likecatgetrepeat)r5   rE   rF   r   
input_nameunconditioned_inputsr;   s         r<   rL   z(DiaGenerationMixin._prepare_model_inputs   s     ,177+H+H%% ,I ,
 ,
(
L > 	]#(#3F#;#; Y(<=1EEEF 0$77C1=>N1O1V1VWXZ[1\1\-.z<//r=   
batch_sizemodel_input_namedecoder_start_token_idc                 >   dx}}|d|v r|                     d          }|d|v r|                     d          }||t                              d|du d|du d           | j        j        j        }| j        r|dz  n|}	|%t          j        |	d|f|t          j	        |	          }t          j
        |	|j        d         ft          j	        |
          }|	                                }
|j        d         |dddddf         | j        j        k                        d                                          z
  }|
ddd|f                             dd          	                                }|ddd|f         	                                }||d<   |
|d<   ||fS )zGPrepares `decoder_input_ids` for generation with encoder-decoder modelsNdecoder_input_idsdecoder_attention_maskz[In order to generate with Dia, we need the processed audio input: Got `decoder_input_ids`: z" and got `decoder_attention_mask`=z]. This can be achieved via the [`DiaProcessor`] but now defaulting to non-delayed generation.rK   r'   )dtyper   )sizerZ   r   r   rH   decoder_delay_mask)poploggerwarning_oncer/   decoder_configr$   rD   rM   fulllongonesshapepad_token_idsumrC   	transpose)r5   rT   rU   r   rV   r   rX   rY   r$   real_batch_size
delay_maskvalid_input_sizes               r<   )_prepare_decoder_input_ids_for_generationz<DiaGenerationMixin._prepare_decoder_input_ids_for_generation   s&    6:92#(;|(K(K , 0 01D E E#(@L(P(P%1%5%56N%O%O" $(>(Fp%T1p pUkswUwp p p    ;5BL15OjAooZO ($)J$a68NV[V`io% % %! &+Z%'8'>q'AB%*]c& & &"
 '++--
#A&*;AAAqqq!G*DH`*`)e)ejl)e)m)m)q)q)s)ss 	 'qqq*;+;*;';<FFq!LLQQSS!7;L<L;L8L!M!R!R!T!T 2H-.-7)* ,..r=   c                    | j         r|d         j        d         dz  n|d         j        d         }|                    || j        j        j        d                              dd          } t                      j        |fd|i|}| 	                    || j        j
        |          |d<   |                    dd          r7|d	         d         dk    r%|d         d d dd d f         d d d d d f         |d<   |d                                         |d<   | j         rSd
D ]P}|                    |d           8t          dgdg||         j        dz
  z  z             } ||         j        | ||<   Q|S )Nr   rK   r\   r'   encoder_outputsrX   	use_cacheFcache_position)rX   rY   decoder_position_ids)rD   re   reshaper/   ra   r$   rh   r0   prepare_inputs_for_generationapply_delay_maskrf   rP   
contiguoustuplendimrQ   )
r5   	input_idsrn   r]   r?   rT   model_inputskeyrepeat_patternr;   s
            r<   rs   z0DiaGenerationMixin.prepare_inputs_for_generation   s    :>h_Q'-a0A55_]^M_MefgMh
%%j$+2L2Y[]^^hhijlmnn	 =uww<YrrXgrkqrr -1,A,At{/1C-
 -
()
 K// 	hLAQ4RST4UXY4Y4Y0<=P0QRSRSRSUWYZYZYZRZ0[\]\]\]_cefefef\f0gL,- -99L,M,X,X,Z,Z() > 	R^ R R##C..:%*A3!S8I8NQR8R1S+S%T%TN(@S(9(@.(QL%r=   rx   pad_idrj   c                     || S t          | j        d         |j        d                   }|d d d |d d f         }| d d d |d d f         }t          j        ||k    ||          | d d d |d d f<   | S rA   )minre   rM   where)rx   r|   rj   mask_len
valid_maskvalid_inputs         r<   rt   z#DiaGenerationMixin.apply_delay_mask   s    yq):+;A+>??9H9aaa0
9H9aaa0 &+[v1E{T^%_%_	!!!YhY/"r=   stopping_criteriasynced_gpusassistant_modelr   streamerr   custom_generatec                 6   |                      |||||          } | j        ||fi |\  }}|                    |          }|                     |                                           |                     |||           |4t                      st          |           ot          j	                    dk    }||nt                      }||nt                      }|                    dd           d u}|                     ||j        |          \  }}}|j        d         }|j        }|                     |||           d|vr|                     ||||          }|                     ||||j        |j                  \  }}|j        r)|                     ||                    d                    }|'|                    |                                           |j        d         }|                    d	          d u o|j        d u}|                    d
          d u o|j        d u}|                     ||||||          }|                                 r	d|vrd|d<   |                     |||           |j        dz
  }|j        d         |k    r"|dk    r| j        j        s||j        d         z  }|                      |||||           | !                    ||||||j        ||	|
	  	        }| "                    |||                    d                    }|j#        |d<   |$                    d|j        d                   }|tJ          j&        tJ          j'        fv r.|j(        dk    rtS          d           | j*        |f|||d||S tS          d          )Nr'   rJ   r   )r   rn   )rT   rU   r   rV   r   	tokenizerr\   r4   
min_length)r   has_default_max_lengthhas_default_min_lengthrU   inputs_tensorinput_ids_lengthlogits_to_keepinputs_embedsr&   )r   r   r   ro   z2`num_return_sequences>1` is incompatible with Dia.)r   r   r   zGot incompatible mode for generation, should be one of greedy or sampling. Ensure that beam search is de-activated by setting `num_beams=1`.)+_extract_generation_mode_kwargsrB   get_generation_mode_validate_model_kwargscopy_validate_generation_moder   r   distget_world_sizer   r   rP   rL   rF   re   r   _prepare_special_tokens._prepare_encoder_decoder_kwargs_for_generationrl   _decoder_start_token_tensortoken_healingheal_tokensputcpur4   r   _prepare_generated_length_supports_logits_to_keep_validate_generated_lengthr/   is_encoder_decoder_prepare_cache_for_generationr1   _get_stopping_criteriaro   rr   r   SAMPLEGREEDY_SEARCHnum_return_sequences
ValueError_sample)r5   rE   r   r   r   r   r   r   r   r    r!   r>   r   r?   generation_mode_kwargsr   generation_modekwargs_has_attention_maskr   rU   rT   r   rx   r   r   r   max_cache_lengthprepared_logits_processorprepared_stopping_criterias                                r<   _main_generate_loopz&DiaGenerationMixin._main_generate_loop   s   $ "&!E!E"
 "
 +J$*I1+
 +
5;+
 +
'< ,??PP##L$5$5$7$7888&&8IKabbb 577W;QRV;W;Wv]a]p]r]ruv]vK/?/K++QdQfQf1B1N--ThTjTj %1$4$45Et$L$LTX$X!8<8R8R%2L9
 9
5' #(+
%$$%68QZ`$aaa L00NN|-=?P L
 #'"P"P!-%#4#P ' #Q #
 #
	< * 	]((4J4N4N{4[4[\\ILL))) %?2.!'L!9!9T!A!nFWFbjnFn!'L!9!9T!A!nFWFbjnFn ::/#9#9-'- ; 
 
 ((** 	//?|/S/S-.L)*''(9;KMcddd -7!;"&666 O33K2 4  3A 66**|_jJZ	
 	
 	

 %)$>$>/!1+%=- '% 3+I %? 
%
 
%
! &*%@%@//,00== &A &
 &
" %6$?[! %%b)/"*=>>	 ~4n6RSSS 599 !UVVV  4<!:"<"3	 
 )    T  r=   c                    |                     d          }||                                } | j        d|||||||||	|
||d|}t          |t          j                   }|r|j        }n|}| j        j        j	        }|j
        d         |z  }|                    ||d                              dd          }|                     || j        j        |          }|r||_        n|}|S )NrX   )rE   r   r   r   r   r   r   r   r    r!   r>   r   r   r\   r'   rK    )rP   cloner   
isinstancerM   Tensor	sequencesr/   ra   r$   re   rr   rh   rt   rf   )r5   rE   r   r   r   r   r   r   r   r    r!   r>   r   r?   rj   outputreturn_dict_in_generateoutput_sequencesr$   bszs                       r<   generatezDiaGenerationMixin.generate  s3   $ ZZ 344
!#))++J)) 
/-/%=#+ 3+I1+
 
 
 
  '1&F&F"F" 	&%/% {1>$Q'<7+33CrJJTTUVXYZZ  001A4;C[]ghh" 	&/F%Fr=   )NNNNNNNN)N)NNN)NN)NNNNNNNNNNNN)!__name__
__module____qualname__rD   r   r   intrM   
LongTensorr   r   listr   strdictr   r1   boolrv   rB   rL   r   rl   rs   staticmethodrt   r   r   no_gradr   r   r   __classcell__)r;   s   @r<   r   r   (   sb       I
 /38<W[:> $156:AEA! A!+A! 'smA! $E$45	A!
 #+8S%,4Gc4R+S"TA! ##67A! A! tCH~.A! &el3A! )1(>A! 
A! A! A! A! A! A!H cg/ /!)*:!;/QYZ^Q_/ru/	%	&/ / / / / /$ *./3:>	0 0&0 u|,0 tC$567	0
 
u|Xc]Del1B,CC	D0 0 0 0 0 08 *.0/ 0/0/ 0/ 3,-	0/
 !&0/ &0/ 
uc5<&7!88	90/ 0/ 0/ 0/j 	# # # # # #J EL # 8TYT`Ka fkfr    \ *.8<:><@W[&*7;-16:AE-1)-Y Y&Y $$45Y ##67	Y
 $$89Y #+8S%,4Gc4R+S"TY d^Y ""34Y >*Y &el3Y )1(>Y %TNY "#Y Y Y Yv U]__ *.8<:><@W[&*7;-16:AE-1)-9 9&9 $$459 ##67	9
 $$899 #+8S%,4Gc4R+S"T9 d^9 ""349 >*9 &el39 )1(>9 %TN9 "#9 
~u//	09 9 9 _9 9 9 9 9r=   r   )$typingr   r   r   r   rM   torch.distributeddistributedr   generation.logits_processr   r	   r
   r   r   generation.stopping_criteriar   generation.streamersr   generation.utilsr   r   r   r   integrations.deepspeedr   integrations.fsdpr   modeling_utilsr   utilsr   
get_loggerr   r_   r   r   r=   r<   <module>r      sq    2 1 1 1 1 1 1 1 1 1 1 1                           A @ @ @ @ @ 0 0 0 0 0 0 a a a a a a a a a a a a @ @ @ @ @ @ 7 7 7 7 7 7 - - - - - -       
	H	%	%g g g g g g g g g gr=   