
    fPiE                     h    d dl mZ d dlmZ d dlmZ d dlmZ  ee          Z	 G d de          Z
dS )    )	getLogger)Fusion)helper)	OnnxModelc                        e Zd Zdef fdZdedefdZdedz  fdZdedededz  fd	Z	dedededz  fd
Z
dedededz  fdZ xZS )FusionFastGelumodelc                 N    t                                          |dd           d S )NFastGeluTanh)super__init__)selfr	   	__class__s     |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_fastgelu.pyr   zFusionFastGelu.__init__   s%    
F33333    input_name_to_nodesoutput_name_to_nodec                     |                      |||          rd S |                     |||          rd S |                     |||          rd S |                     |||          rd S d S )N)fuse_1fuse_2fuse_3fuse_4)r   	tanh_noder   r   s       r   fusezFusionFastGelu.fuse   s    ;;y"57JKK 	F;;y"57JKK 	F;;y"57JKK 	F;;y"57JKK 	F	 	r   returnNc                     |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            |d          sdS |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            |dd|          }|dS | j                            |d          }|dk     rdS |j        |dk    rdnd         }	| j                            ||dk    rdnd|          }
| j                            |dd|          }|dS | j                            |dd	
          }|dk     rdS | j                            |d|dk    rdnd|          }|dS | j                            |dd||
r|
gng           }|dS | j                            |dd	
          }|dk     rdS | j                            |d|dk    rdnd|          }|dS | j                            |d          sdS |j        d         |	k    rdS ||||||||g}| j        	                    ||j         d         g||          sdS | j
                            |           t          j        d|	g|j         | j                            d                    }d|_        | j                            |           | j        | j        |j        <   dS )aj  
        Fuse Gelu with tanh into one node:
              +---------------------------+
              |                           |
              |                           v
            [root] --> Pow --> Mul -----> Add  --> Mul --> Tanh --> Add --> Mul
              |       (Y=3)   (B=0.0447...)       (B=0.7978...)    (B=1)     ^
              |                                                              |
              +------> Mul(B=0.5)--------------------------------------------+
        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   N   Add      ?Mul      ?+ݓ?-C6?deltaexclude,C?Pow      @r   inputsoutputsnamecom.microsoftT)outputlenop_typer	   has_constant_inputmatch_parentfind_constant_inputinput
get_parentis_safe_to_fuse_nodesnodes_to_removeextendr   	make_nodecreate_node_namedomainnodes_to_addappendthis_graph_namenode_name_to_graph_namer/   )r   r   r   r   childrenadd_after_tanhmul_after_tanhmul_halfi
root_input	root_nodemul_before_tanhadd_before_tanhmul_after_powpowsubgraph_nodes
fused_nodes                    r   r   zFusionFastGelu.fuse_1   s    A&999F&y'7':;x==A!!4!=!=F!!z,,^SAA 	F #+>>>F&~'<Q'?@x==A!!4!=!=F!!:**>5$H[\\FJ**8S99q55F^aAAQ7
 J))(aAAQH[\\	*11)UAGZ[["FJ**?F&*QQq55F*11/5qTUvv!![\^qrr"F
//#,4YKK" 0 
 
  FJ**=&*OOq55Fj%%mUaAAQPcdd;Fz,,S#66 	F9Q<:%%F 	
 z//"1%&	
 
 	 F##N333%<"),,Z88	
 
 

 ,
  ,,,8<8L$Z_5tr   c                    |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            |d          sdS |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            |d          }|dk     rdS |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            ||j        d         |j         d         k    rdnd|          }	| j                            |dd|          }
|
dS | j                            |
dd	
          }|dk     rdS | j                            |
d|dk    rdnd|          }|dS | j                            |dd||	r|	gng           }|dS | j                            |dd	
          }|dk     rdS | j                            |d|dk    rdnd|          }|dS | j                            |d          sdS |j        |j        d         |j         d         k    rdnd         }|j        d         |k    rdS |||||
|||g}| j        	                    ||j         d         g||          sdS | j
                            |           t          j        d|g|j         | j                            d                    }d|_        | j                            |           | j        | j        |j        <   dS )a  
        This pattern is from Tensorflow model.
        Fuse Gelu with tanh into one node:
              +---------------------------+
              |                           |
              |                           v
            [root] --> Pow --> Mul -----> Add  --> Mul --> Tanh --> Add --> Mul(B=0.5)-->Mul-->
              |       (Y=3)   (B=0.0447...)       (B=0.7978...)    (B=1)                  ^
              |                                                                           |
              +---------------------------------------------------------------------------+
        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   Nr   r   r    r!   r"   r#   r$   r%   r'   r)   r*   r+   r   r,   r0   T)r1   r2   r3   r	   r4   r6   r8   r7   r5   r9   r:   r;   r   r<   r=   r>   r?   r@   rA   rB   r/   )r   r   r   r   rC   rD   rF   rG   mul_after_mul_halfrI   rJ   rK   rL   rM   rH   rN   rO   s                    r   r   zFusionFastGelu.fuse_2   s    A&999F&y'7':;x==A!!4!=!=F!!z,,^SAA 	F #+>>>F&~'<Q'?@x==A!!4!=!=FA;J**8S99q55F?1%888F&xq'9:x==A!!4!=!=F%a[ J))#)!,0BBBAA
 
	 *11)UAGZ[["FJ**?F&*QQq55F*11/5qTUvv!![\^qrr"F
//#,4YKK" 0 
 
  FJ**=&*OOq55Fj%%mUaAAQPcdd;Fz,,S#66 	F'-3E3KA3NRZRabcRd3d3daajkl
9Q<:%%F 	
 z//&q)*	
 
 	 F##N333%<&-,,Z88	
 
 

 ,
  ,,,8<8L$Z_5tr   c           	         |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            |d          sdS |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            |dd|          }|dS | j                            |d          }|dk     rdS |j        |dk    rdnd         }	| j                            |dd|          }
|
dS | j                            |
dd|          }|dS | j                            |d          }|dk     rdS | j                            |
dd|          }|dS | j                            |dd	
          }|dk     rdS |j        |dk    rdnd         |	k    rdS | j                            |d|dk    rdnd|          }|dS |j        d         |	k    rd}n|j        d         |	k    rd}ndS | j                            |d||          }|dS | j                            |dd	
          }|dk     rdS |j        |dk    rdnd         |	k    rdS ||||
|||||g	}| j                            ||j         d         g||          sdS | j	        
                    |           t          j        d|	g|j         | j                            d                    }d|_        | j                            |           | j        | j        |j        <   dS )a  
        OpenAI's gelu implementation, also used in Megatron:
           Gelu(x) = x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1.0 + 0.044715 * x * x)))

        Fuse subgraph into a FastGelu node:
            +------------ Mul (B=0.79788456) -------------------+
            |                                                   |
            +-------------------------------+                   |
            |                               |                   |
            |                               v                   v
          [root] --> Mul (B=0.044715) --> Mul --> Add(B=1) --> Mul --> Tanh --> Add(B=1) --> Mul-->
            |                                                                                 ^
            |                                                                                 |
            +-----------> Mul (B=0.5) --------------------------------------------------------+
        r   Nr   r   r    r!   r"   r#   r$   r%   r)   r   r,   r0   T)r1   r2   r3   r	   r4   r5   r6   r7   r9   r:   r;   r   r<   r=   r>   r?   r@   rA   rB   r/   )r   r   r   r   rC   rD   mul_lastrF   rG   rH   rJ   add_1jmul_7978kmul_before_add_1anothermul_0447mrN   rO   s                        r   r   zFusionFastGelu.fuse_3   s     A&999F&y'7':;x==A!!4!=!=F!!z,,^SAA 	F #+>>>F&~'<Q'?@x==A!!4!=!=FA;:**8UDBUVVFJ**8S99q55F^aAAQ7
*11)UAGZ[["F
''FYZZ=FJ**5#66q55F:**?E4I\]]FJ**8V6*JJq55F>qAvv!!1-;;F:225%a1ffRSUhii#F!!$
22GG#A&*44GGF:**+;UGM`aaFJ**8V6*JJq55F>qAvv!!1-;;F 

 z//_Q 	
 
 	 F##N333%<O,,Z88	
 
 

 ,
  ,,,8<8L$Z_5tr   c           	      J   |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            |d          sdS |j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }|j         d         |vrdS ||j         d                  }t          |          dk    s|d         j        dk    rdS |d         }| j                            |d          sdS |j        |j        d         |j         d         k    rdnd         }| j                            |dd|          }	|	dS | j                            |	dd	
          }
|
dk     rdS | j                            |	d|
dk    rdnd|          }|dS |j        d         |k    rd}n|j        d         |k    rd}ndS | j                            |d||          }|dS | j                            |dd	
          }|dk     rdS | j                            |d|dk    rdnd|          }|dS |j        d         |k    rd}n|j        d         |k    rd}ndS | j                            |d||          }|dS |j        d         |k    s|j        d         |k    rdS |||||	||||g	}| j                            ||j         d         g||          sdS | j	        
                    |           t          j        d|g|j         | j                            d                    }d|_        | j                            |           | j        | j        |j        <   |                     d           dS )aR  
        PyTorch's gelu implementation with tanh approximation:
           Gelu(x) = 0.5 * x * (1 + torch.tanh(0.7978845834732056 * (x + 0.044714998453855515 * x * x * x)))

        Fuse Gelu with tanh into one node:
              +-----------------+------------------+
              |                 |                  |
              |                 v                  v
            [root] ==> Mul --> Mul --> Mul -----> Add  --> Mul --> Tanh --> Add -----> Mul --> Mul -->
              |                       (A=0.0447)          (A=0.7978)        (A=1)       ^     (A=0.5)
              |                                                                         |
              +-------------------------------------------------------------------------+
        Note that constant input for Add and Mul could be first or second input.
        r   Nr   r   r    r!   r"   r#   g{Gz?r%   r)   r   r,   r0   T)r1   r2   r3   r	   r4   r7   r5   r6   r9   r:   r;   r   r<   r=   r>   r?   r@   rA   rB   r/   increase_counter)r   r   r   r   rC   rD   rE   rF   rH   rJ   rW   rK   rY   rL   r[   	mul_cubedmul_squaredrN   rO   s                      r   r   zFusionFastGelu.fuse_4s  sC    A&999F&y'7':;x==A!!4!=!=F!!z,,^SAA 	F #+>>>F&~'<Q'?@x==A!!4!=!=F!! #+>>>F&~'<Q'?@x==A!!4!=!=FA;z,,Xs;; 	F#)~/CA/F.J_`aJb/b/b!!hij
*11)UAGZ[["FJ**?F$*OOq55F*11/5qTUvv!![\^qrr"F #z11GG"1%33GGF
//Qdee FJ**=&*MMq55FJ++M5qAvv!!STVijj	F?1++GG_Q:--GGFj--iI\]]FQ:--1B11E1S1SF 

 z//_Q 	
 
 	 F##N333%<O,,Z88	
 
 

 ,
  ,,,8<8L$Z_5j)))tr   )__name__
__module____qualname__r   r   dictr   boolr   r   r   r   __classcell__)r   s   @r   r   r      s:       4i 4 4 4 4 4 44 d    jTTX[ j j j jXrT rPT rY]`dYd r r r rhrT rPT rY]`dYd r r r rhyT yPT yY]`dYd y y y y y y y yr   r   N)loggingr   fusion_baser   onnxr   
onnx_modelr   r`   loggerr    r   r   <module>rl      s   
                              	8		^ ^ ^ ^ ^V ^ ^ ^ ^ ^r   