
    Pi5                        d dl Z d dlZd dlZd dlZd dlZd dlZd dl mZ d dlmZ d dl	m
Z
 d dlmZmZmZmZ d dlZd dlmc mc mZ d dlmZ g dZe j        dej        j        d	efd
            ZdKdZd Zd Zd Zd Zdej         dej         d	ej         fdZ!d Z"de#de#d	e#fdZ$dLdZ%d Z&dMdZ' G d dej        j                  Z(dNdZ)d ej*        d	e+fd!Z,d" Z-d# Z.d$ Z/ G d% d&          Z0	 d' Z1d( Z2d) Z3d* Z4dOd+Z5d, Z6d-ed.efd/Z7d-ed.ed	efd0Z8d1 Z9 G d2 d3ej                   Z:d4 Z;d5 Z<d6 Z=d7 Z>d8 Z?d9e#d:e#d	e+fd;Z@d< ZAd= ZBd> ZCd9e#d:e#d	e+fd?ZDdPdAZEdQdCZFdD ZGdEeHdFeHfdGZIdH ZJ G dI dJej        j                  ZKdS )R    Nreduce)versiongcd)AnyCallableOptionalType)return_and_correct_aliasing)benchmark_modelprofiler_runnerget_available_devicesget_compute_capability(benchmark_torch_function_in_microsecondsfind_multiple_register_custom_opget_model_size_in_bytesunwrap_tensor_subclassTorchAOBaseTensoris_cuda_version_at_leastis_MI300is_sm_at_least_89is_sm_at_least_90is_sm_at_least_100is_package_at_leastDummyModulemodulereturnc                 "   d |                                  D             d |                                 D             z  }t          |          dk    sJ d|             t          |          dk    rt          t	          |                    nd}|S )z
    Returns the unique device for a module, or None if no device is found.
    Throws an error if multiple devices are detected.
    c                     h | ]	}|j         
S  device.0ps     a/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/utils.py	<setcomp>z0_assert_and_get_unique_device.<locals>.<setcomp>1   s    555Aqx555    c                     h | ]	}|j         
S r"   r#   r%   s     r(   r)   z0_assert_and_get_unique_device.<locals>.<setcomp>1   s'     9 9 99 9 9r*      zKprepare only works with cpu or single-device CUDA modules, but got devices r   N)
parametersbufferslennextiter)r   devicesr$   s      r(   _assert_and_get_unique_devicer3   +   s     65!2!2!4!4555 9 9 ..**9 9 9 G w<<1	%"	% 	%  %(LL1$4$4T$w--   $FMr*   r"   c                    |i }|=t          | t          j        j                  s
J d            t	          |           j        }|dk    rt          j                                         t          j                            d          }t          j                            d          }|	                                 t          |          D ]F}t          j        j                            d          5   | |i | ddd           n# 1 swxY w Y   G|	                                 t          j                                         |                    |          |z  S |dk    rt          j                                         t          j        j                            d          }t          j        j                            d          }|	                                 t          |          D ]F}t          j        j                            d          5   | |i | ddd           n# 1 swxY w Y   G|	                                 t          j                                         |                    |          |z  S |dk    rt          j                                         t%          j                    }t          |          D ]F}t          j        j                            d          5   | |i | ddd           n# 1 swxY w Y   Gt%          j                    }	t          j                                         |	|z
  |z  }
|
S dS )	z?Benchmark model runs with `args` and `kwargs` both are optionalNzFExpecting `model` to be torch.nn.Module if device_type is not providedcudaT)enable_timingztimed regionmpscpu)
isinstancetorchnnModuler3   typer5   synchronizeEventrecordrangeautogradprofilerrecord_functionelapsed_timer7   eventr8   time)modelnum_runsargskwargsdevice_typestart_event	end_event_
start_timeend_timeaverage_time_per_runs              r(   r   r   =   s   ~%11 	
 	
T	
 	
1 4E::?f
   j&&T&::J$$4$88	 x 	' 	'A(88HH ' 't&v&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 	
   ''	22X==				io++$+??IO)))==	 x 	' 	'A(88HH ' 't&v&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 		''	22X==				Y[[
 x 	' 	'A(88HH ' 't&v&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 9;;	 (: 5A## 
	s6   2	DD	D		H**H.	1H.	,	LL	L	c                    t           j                            t           j        j        j        t           j        j        j        gd          5 } ||i |}d d d            n# 1 swxY w Y   |                    |            |S )NT)
activitiesrecord_shapes)r:   rC   profileProfilerActivityCPUCUDAexport_chrome_trace)pathfnrJ   rK   profresults         r(   r   r   u   s    			N+/N+0
  
  
 
 % 
T$V$$% % % % % % % % % % % % % % % 	T"""Ms   	A!!A%(A%c                  @   dg} t           j                                        r|                     d           n3t           j                                        r|                     d           t           j                                        r|                     d           | S )Nr8   r5   xpur7   )r:   r5   is_availableappendr`   r7   )r2   s    r(   r   r      s    gGz   v				!	! uy uNr*   c                  ~    t           j                                        rt           j                                        S d S N)r:   acceleratorra   current_acceleratorr"   r*   r(   get_current_accelerator_devicerg      s3    %%''  44666tr*   c                      t           j                                        r>t           j                                        } t	          | d          d| d                    S dS )Nr   .r,   g        )r:   r5   ra   get_device_capabilityfloat)
capabilitys    r(   r   r      sT    z   9Z5577

177
1778883r*   output
output_refc                     t          j        t          j        | |z
                      t          j        t          j        |                    z  S rd   )r:   meanabs)rm   rn   s     r(   compute_max_diffrr      sC    :ei 34455
	*9 9  r*   c                     dd l mc m}  | |i |  | |i | |                    d||| d          }|                                }|j        dz  S )Nr   zf(*args, **kwargs))rJ   rK   f)stmtglobalsg    .A)torch.utils.benchmarkutils	benchmarkTimerblocked_autorangerp   )rt   rJ   rK   ry   t0measurements         r(   r   r      s    --------- AtvAtv	!a88 
 
 
B &&((Kc!!r*   nrJ   c                 V    t          d |dz             }| |z  dk    r| S | |z   | |z  z
  S )Nc                 .    | |z  t          | |          z  S rd   r   )xys     r(   <lambda>zfind_multiple.<locals>.<lambda>   s    Q#a))!3 r*   )r,   r   r   )r~   rJ   ks      r(   r   r      s=    33TD[AAA1uzzq5AE?r*   Tc                 6     ddl m rdnd fd}|S )a  This decorator is used to preserve some high level operators for torch.export.export
    while still allow them to be decomposed for inductor path

    requirement: make sure `fn.__name__[1:]` is the operator name you want to register

    NOTE: This should be applied at the top, after all other decorators have been applied
    NOTE: We haven't tested the case when `fn` accepts tensor subclass instance as input,
    e.g. uint4 tensor subclass instance, and we'll probably need to figure out what would make
    sense for downstream system (like executorch) to accept as well

    Example:
        lib = torch.library.Library("my_namespace', "FRAGMENT")

        register_custom_op = _register_custom_op(lib)

        @register_custom_op
        def _the_op_that_needs_to_be_preserved(...)
            ...

        # after this, `_the_op_that_needs_to_be_preserved` will be preserved as
        # torch.ops.my_namespace.the_op_that_needs_to_be_preserved operator after
        # torch.export.export

    r   )register_decompositionCompositeImplicitAutogradCompositeExplicitAutogradc                     ddl m} t           fddD                       rJ d j                      j        }|d         dk    r
|dd          }| | i           z   }                    |                               |            j        }t          t          t          j	        |          |          }r  	|g                      |S )	Nr   )infer_schemac              3   *   K   | ]}|j         v V  d S rd   )__name__)r&   cr\   s     r(   	<genexpr>z9_register_custom_op.<locals>.decorator.<locals>.<genexpr>   s*      77AqBK'777777r*   z.<>zEExpecting op to be defined in normal functions, not lambda or local: rO   r,   )mutates_args)
torch._library.infer_schemar   anyr   defineimplnsgetattrr:   ops)
r\   r   op_nameschemalib_namespaceopdispatch_keyinductor_decomposedlibr   s
   `     r(   	decoratorz&_register_custom_op.<locals>.decorator   s   <<<<<<777777777 	
 	
aTVT_aa	
 	
7 +1:abbkG<<<<<<

6"l+++WUY66@@ 	-(""B4((,,,	r*   )torch._inductor.decompositionr   )r   r   r   r   r   s   `` @@r(   r   r      sc    2 EDDDDD 	)##(        & r*   c                       fd}|S )Nc                 6                         | d          }|S )NMeta)r   )r\   r   r   r   s     r(   r   z$_register_meta_op.<locals>.decorator   s    XXgr6**	r*   r"   )r   r   r   s   `` r(   _register_meta_opr      s*          r*   Fc                 N   fdd}|                                  D ]\  }}t          |t          j        j                  r|sat          j        |                    d          |                    d                    D ]}| |          z  }|t          ||          z  }|S )z
    Returns the model size in bytes. The option to ignore embeddings
    is useful for models with disproportionately large embeddings compared
    to other model parameters that get quantized/sparsified.
    c                     t          | d          r?d}|                                 d         D ] }t          | |          }| |          z  }!|S |                                 |                                 z  S )N__tensor_flatten__r   )hasattrr   r   numelelement_size)tensorsize	attr_name
sub_tensor	flat_sizes       r(   r   z*get_model_size_in_bytes.<locals>.flat_size   s    6/00 		:D $6688; . .	$VY77
		*---K<<>>F$7$7$9$999r*   r   F)recurse)
named_childrenr9   r:   r;   	Embedding	itertoolschainr-   r.   r   )rH   ignore_embeddings
model_sizenamechildr'   r   s         @r(   r   r      s    
: 
: 
: 
: 
: J++-- L Le5%("455 	L:K 	L_   //u1M1M  + + iill*

1%9JKKKJr*   c                       e Zd Zd Zd ZdS )UnwrapTensorSubclassc                 b   t          |          }t          | j                  D ]o\  }}}t          |          }d t	          ||| d                    D             }||d          }|                    ||d d           }|                    |           pt          |          dk    sJ |d         S )Nc                     i | ]\  }}||	S r"   r"   )r&   abs      r(   
<dictcomp>z0UnwrapTensorSubclass.forward.<locals>.<dictcomp>  s    TTTdaQTTTr*   r,   r   )listreversedrebuild_stackr/   zip__tensor_unflatten__rb   )selftensorstodotpmetainner_tensors	nb_tensorrebuilts           r(   forwardzUnwrapTensorSubclass.forward  s    G}}'/0B'C'C 	! 	!#BmM**ITTc-yjkkAR.S.STTTM	

#D--mT4NNGKK    4yyA~~~~Awr*   c                    t          |          t          j        usJ g }g }|g}|r|                                }|                                \  }}|                    t          |          ||f           |D ]t}t          ||          }	t          |	          t          j        u r|                    |	           Ct          |	t          j                  sJ |                    |	           u||| _        |S rd   )	r=   r:   Tensorpopr   rb   r   r9   r   )
r   r   r   plain_tensorsr   objr   metadatar   vals
             r(   right_inversez"UnwrapTensorSubclass.right_inverse!  s   F||5<////x 
	%((**C&)&<&<&>&>#M8  $s))X}!EFFF* % %	c9--99,,!((----%c5<88888KK$$$$  
	% +r*   N)r   
__module____qualname__r   r   r"   r*   r(   r   r     s2        
 
 
    r*   r   c                    |                                  D ]C\  }}t          |t          j        j                  st          |t          j        j                  rt          |d          rt          |j                  t          j	        urt          |j                  t          j        j
        urt          |j        t          j	                  r|t          t          |j                  t          j	                  rPt          |j        t                    r6t          j        |          s"t          j        |dt!                                 t#          |           E| S )a5  Unwraps (nested) tensor subclass in the model to plain tensors
    This is a workaround to make a model with tensor subclass to work with `torch.export.export`
    and `torch.aot_compile`, we hope this can be integrated into compile stack soon
    tracking issue: https://github.com/pytorch/ao/issues/345
    weight)r   r9   r:   r;   Linearr   r   r=   r   r   	Parameter
issubclassr   parametrizeis_parametrizedregister_parametrizationr   r   )rH   	filter_fnr   r   s       r(   r   r   7  s8    ++-- & &e 5%(/22	 eUX%788	
 x((	 U\""%,66U\""%(*<<<5<66 =4--u|<< = 5<):;; =  /66	 = 0x!5!7!7   	u%%%%Lr*   dtypec                 f    t           j        t           j        t           j        t           j        h}| |v S rd   )r:   float8_e4m3fnfloat8_e4m3fnuzfloat8_e5m2float8_e5m2fnuz)r   	fp8_typess     r(   _is_float8_typer   S  s.    	I Ir*   c                     t          t          j        d|                     }t          j        d|           }|r4t	          t
          |                                          \  }}}|rd}|||gS t          d|            )z
    Parse version string representing pre-release with -1

    Examples: "2.5.0.dev20240708+cu121" -> [2, 5, -1], "2.5.0" -> [2, 5, 0]
    z	(git|dev)z(\d+)\.(\d+)\.(\d+)zInvalid version string format: )boolresearchmatchmapintgroups
ValueError)version_stringis_prereleaser   majorminorpatchs         r(   parse_versionr   ]  s     <@@AAMH+^<<E M!#u||~~66ue 	Eue$$K>KKLLLr*   c                  8    t          t          j        d           S )Ngit_version)r   r:   r   r"   r*   r(   	is_fbcoder   o  s    u}m4444r*   c                 v    t                      rdS t          t          j                  t          |           k    S )NT)r   r   r:   __version__)min_versions    r(   torch_version_at_leastr   s  s3    {{ t *++}[/I/IIIr*   c                   (    e Zd ZdZdedefdZd ZdS )_ConfigDeprecationWrapperz
    A deprecation wrapper that directs users from a deprecated "config function"
    (e.g. `int4_weight_only`) to the replacement config class.
    deprecated_name
config_clsc                 "    || _         || _        d S rd   )r  r  )r   r  r  s      r(   __init__z"_ConfigDeprecationWrapper.__init__  s    .$r*   c           	          t          j        d| j         d| j        j         d| j        j         d            | j        |i |S )N`zE` is deprecated and will be removed in a future release. Please use `z/` instead. Example usage:
    quantize_(model, z(...)))warningswarnr  r  r   r   rJ   rK   s      r(   __call__z"_ConfigDeprecationWrapper.__call__  st    E$ E E?3E E$(O$<E E E	
 	
 	

 t////r*   N)r   r   r   __doc__strr   r  r
  r"   r*   r(   r   r   {  sO         
% % % % % %0 0 0 0 0r*   r   c                      t           d          si  _          j        vr
i  j         <   t          t          t          f          sg fd}|S )z7Decorator to implement aten ops for __torch_dispatch__._ATEN_OP_TABLEc                 p    D ]1}t          j        |           | fd            }|j                 |<   2| S )Nc                      || |||          S rd   r"   rt   typesrJ   rK   _funcs        r(   wrapperz/_implements.<locals>.decorator.<locals>.wrapper      uQtV444r*   )	functoolswrapsr  )funcr   r  aten_opsclss      r(   r   z_implements.<locals>.decorator  sZ     	2 	2B_T""6: 5 5 5 #"5 +2Cs#B''r*   )r   r  r9   r   tuple)r  r  r   s   `` r(   _implementsr    s}    3())  
#$$$"$3hu.. :      r*   c                      t           d          si  _          j        vr
i  j         <   t          t          t          f          sg fd}|S )z*Decorator to implement __torch_function__._TORCH_FN_TABLEc                 p    D ]1}t          j        |           | fd            }|j                 |<   2| S )Nc                      || |||          S rd   r"   r  s        r(   r  z>_implements_torch_function.<locals>.decorator.<locals>.wrapper  r  r*   )r  r  r  )r  r\   r  r  	torch_fnss      r(   r   z-_implements_torch_function.<locals>.decorator  sZ     	3 	3B_T""6: 5 5 5 #"5 ,3C$R((r*   )r   r  r9   r   r  )r  r!  r   s   `` r(   _implements_torch_functionr"    s}    3)** ! 
#%%%#%C i$//  K	      r*   c                 R   | j         }| j        }t          j        j        } |t          j        j        j        j                  d             } |t          j                  d             } |t          j        j	        g          d             } ||j
        j        |j        j        |j        j        |j	        j        g          d             }dt          dt          dt          fd ||j        j                  fd	            } ||j        j                  d
             }d S )Nc                 |    t          |          dk    sJ |d         }|                                D ]}|dv sJ 	d|v r|d         |j        k    sJ d|v r|d         |j        k    sJ |                    d          |j        k    r|S |d                              fd          }t           |||          S )Nr,   r   )r   layoutr$   r   r%  r$   c                      |           S )Nr#   r"   )r   r$   r  s    r(   r   z:_implements_common_tensor_ops.<locals>._.<locals>.<lambda>  s    a9O9O9O r*   )r/   keysr   r%  getr$   _apply_fn_to_datar   )r  r  rJ   rK   r   r   
new_tensorr$   s   `      @r(   rO   z(_implements_common_tensor_ops.<locals>._  s     4yyA~~~~Aw 	6 	6A555555 f'?dj0000v(#t{2222H%%T[  K!W../O/O/O/O/OPP
*4vzJJJr*   c                     t          |          dk    sJ t          |d                   t          |d                   k    o|d         j        |d         j        k    S )N   r   r,   )r/   r=   shaper  r  rJ   rK   s       r(   rO   z(_implements_common_tensor_ops.<locals>._  sJ    4yyA~~~~DG}}T!W-P$q'-47=2PPr*   c                 H     d                               fd          S )Nr   c                 ,     | gdd          R i S Nr,   r"   r   rJ   r  rK   s    r(   r   z:_implements_common_tensor_ops.<locals>._.<locals>.<lambda>  s)    443ODH3O3O3O3O3O r*   )r)  r.  s   ` ``r(   rO   z(_implements_common_tensor_ops.<locals>._  s/     Aw(()O)O)O)O)O)OPPPr*   c           
      h     t           d                              fd                    S )Nr   c                 ,     | gdd          R i S r1  r"   r2  s    r(   r   z:_implements_common_tensor_ops.<locals>._.<locals>.<lambda>  s)    Q0Labb0L0L0LV0L0L r*   )r   r)  r.  s   ` ``r(   rO   z(_implements_common_tensor_ops.<locals>._  sG     +G%%&L&L&L&L&L&LMM	
 
 	
r*   r   srcr   c                     t           fd j        D                       }d}t           d          r!t           fd j        D                       }t           fd j        D                       }d}t           d          r!t           fd j        D                       }t                     t                    k    o j        j        k    o|o|o|o|S )Nc              3   p   K   | ]0}t          |          j        t          |          j        k    V  1d S rd   r   r-  r&   t_namer   r5  s     r(   r   zH_implements_common_tensor_ops.<locals>._same_metadata.<locals>.<genexpr>  sV       "
 "
 D&!!'73+?+?+EE"
 "
 "
 "
 "
 "
r*   Toptional_tensor_data_namesc              3      K   | ]R}t          |          ,t          |          j        t          |          j        k    nt          |          d u V  Sd S rd   r8  r9  s     r(   r   zH_implements_common_tensor_ops.<locals>._same_metadata.<locals>.<genexpr>  s       / /  tV,,8 D&))/733G3G3MMM f--5	/ / / / / /r*   c              3   \   K   | ]&}t          |          t          |          k    V  'd S rd   r   r&   a_namer   r5  s     r(   r   zH_implements_common_tensor_ops.<locals>._same_metadata.<locals>.<genexpr>  sP       
 
 D&!!WS&%9%99
 
 
 
 
 
r*   optional_tensor_attribute_namesc              3   \   K   | ]&}t          |          t          |          k    V  'd S rd   r>  r?  s     r(   r   zH_implements_common_tensor_ops.<locals>._same_metadata.<locals>.<genexpr>  sP       ' ' f%%f)=)==' ' ' ' ' 'r*   )alltensor_data_namesr   r;  tensor_attribute_namesrA  r=   r-  )r   r5  _tensor_shape_match_optional_tensor_shape_match_attr_match_optional_attr_matchs   ``    r(   _same_metadataz5_implements_common_tensor_ops.<locals>._same_metadata  s   ! "
 "
 "
 "
 "
0"
 "
 "
 
 
 (,$4566 		+. / / / / / #=/ / / , ,(  
 
 
 
 
5
 
 
 
 

  $4:;; 	#& ' ' ' ' '"B' ' ' $ $  JJ$s))# %
ci'%#% -% 	%
 %	
r*   c                     |d         }|d         } ||          rR|                                 d         }|D ]3}t          ||                              t          ||                     4d S t          d|d         |d         f           )Nr   r,   z7Not supported args for copy_ due to metadata mismatch: )r   r   copy_r   )	r  r  rJ   rK   r   r5  self_tensorstensor_namerJ  s	           r(   rO   z(_implements_common_tensor_ops.<locals>._!  s    Aw1g>$$$ 	2244Q7L+ L Lk**00k1J1JKKKKFXd1gtTUwEWXX
 
 	
r*   c                    |d         t          d          rt          d          r	 j        |dd          i |}|                    d          fdj        D             }g }t          d          rZj        D ]R}t          |          }|)|                    |                                         =|                    d            Sfdj        D             }g }	t          d	          rfd
j	        D             }	 j
        g ||||	R  }
t          | |||
          S t          d          )Nr   rD  rE  r,   r$   c                 V    g | ]%}t          |                                        &S r"   )r   to)r&   r   r$   r   s     r(   
<listcomp>z<_implements_common_tensor_ops.<locals>._.<locals>.<listcomp>6  s>       37d##&&v..  r*   r;  c                 @    g | ]}|d k    rt          |          nS r#   r>  r&   r   r$   r   s     r(   rR  z<_implements_common_tensor_ops.<locals>._.<locals>.<listcomp>C  sB     ! ! ! -6,A,Ai(((v! ! !r*   rA  c                 @    g | ]}|d k    rt          |          nS r#   r>  rT  s     r(   rR  z<_implements_common_tensor_ops.<locals>._.<locals>.<listcomp>I  sB     . . .! 1:X0E0EGD),,,6. . .r*   zSubclasses must implement `aten._to_copy.default` or specify `tensor_data_names` and `tensor_attribute_names` for tensor class or tensor instance before using it)r   _get_to_kwargsr   rD  r;  r   rb   rQ  rE  rA  	__class__r   NotImplementedError)r  r  rJ   rK   r   optional_tensorstensor_data_namemaybe_tensortensor_attributesoptional_tensor_attributestr$   r   s              @@r(   rO   z(_implements_common_tensor_ops.<locals>._.  s   Aw4,-- #	F'*3
 3
 #	F )T($qrr(=f==FZZ))F    ;?;Q  G  "t9:: 6(,(G 6 6$#*41A#B#BL#/(//0G0GHHHH(//5555! ! ! ! !!%!<! ! ! *,&t>?? . . . . .%)%I. . .*
  " " ,	  A /tT61EEE! p
 
 	
r*   )
implementsimplements_torch_functionr:   r   atenrQ  dtype_layout!_has_compatible_shallow_copy_typer   
contiguousdetachdefaultclonealiasr   r   rL  _to_copy)r  r_  r`  ra  rO   rJ  s        @r(   _implements_common_tensor_opsrj    s   J # =9>DZ	!.//K K 0/K0 uFGGQ Q HGQ L#	
 
Q Q 
Q ZKJJO#		
 
 
 
$
. $
5F $
4 $
 $
 $
 $
L Z
"##

 

 

 

 $#

 Z%&&)
 )
 '&)
 )
 )
r*   c                    t          | d          rt          | d          sJ t          j                            | |           t	          | dg           D ],}|| j        vr!t          | |          st          | |d            -t	          | dg           D ],}|| j        vr!t          | |          st          | |d            -d S )NrD  rE  r;  rA  )r   r:   _utils_set_obj_stater   __dict__setattr)r   stateoptional_tensor_data_nameoptional_tensor_attribute_names       r(    _torchao_base_tensor__setstate__rs  [  s   4,-- '&3 3    
Le,,,%,T3OQS%T%T ; ;!$DM99'+C
 C
9 D3T:::*1/+ + @ @& *>>w0H
 H
> D8$???@ @r*   c                    |i n|}t          | d          r7| | j        v r.|| j        |          v r | j        |          |         ||||          S t          j                                        5   ||i |cddd           S # 1 swxY w Y   dS )a  Use this util function for a common `__torch_function__` implementation
    that dispatches to ops/functions registered with `_implements`

    class MyTensor(torch.Tensor):
        ...
        __torch_function__ = classmethod(_dispatch__torch_function__)
    Nr  )r   r  r:   _CDisableTorchFunctionSubclass)r  r  r  rJ   rK   s        r(   _dispatch__torch_function__rw  o  s     >RRvF&''I3&&&C',,,-s"3'-dE4HHH		.	.	0	0 % %tT$V$$% % % % % % % % % % % % % % % % % %s   ,BBBc                 B   t          | d          r7| | j        v r.|| j        |          v r | j        |          |         ||||          S t          d |D                       }d |                                D             }t	          | j         d|d|d|d|	          )a  Use this util function for a common `__torch_dispatch__` implementation
    that dispatches to ops/functions registered with `_implements`

    class MyTensor(torch.Tensor):
        ...
        __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
    r  c              3   4   K   | ]}t          |          V  d S rd   r=   r&   args     r(   r   z._dispatch__torch_dispatch__.<locals>.<genexpr>  s(      00Cd3ii000000r*   c                 4    i | ]\  }}|t          |          S r"   rz  )r&   r   r|  s      r(   r   z/_dispatch__torch_dispatch__.<locals>.<dictcomp>  s$    ===FAs1d3ii===r*   zC dispatch: attempting to run unimplemented operator/function: func=z, types=z, arg_types=z, kwarg_types=)r   r  r  itemsrX  r   )r  r  r  rJ   rK   	arg_typeskwarg_typess          r(   _dispatch__torch_dispatch__r    s     	%&&H3%%%C&s+++,s!#&t,T5$GGG00400000I==fllnn===K
<  	G  	GW[  	G  	G`e  	G  	Gjs  	G  	G  yD  	G  	G  r*   tensor_classlayout_classc                 D     t           d          si  _         fd}|S )a~  Helper function for layout registrations, this is used to implement
    register_layout decorator for each tensor subclass, see aqt.py for example usage

    Args:
        tensor_class: Tensor subclass type
        layout_class: the class type of subclass of `Layout`, e.g. `PlainLayout`

    Returns:
        a decorator that registers the tensor impl constructor in the table
    _LAYOUT_CONSTRUCTOR_TABLEc                 h    | j         j        <   t          j                            | g           | S rd   )
from_plainr  r:   serializationadd_safe_globals)tensor_impl_classr  r  s    r(   r   z#_register_layout.<locals>.decorator  s;    ( 	.|< 	,,l<M-NOOO  r*   )r   r  )r  r  r   s   `` r(   _register_layoutr    sG     <!<== 413.! ! ! ! ! ! r*   c                     t          | d          st          d|            || j        vrt          d| d|            | j        |         S )a  Get TensorImpl class constructor (TensorImplClass.from_plain) for `tensor_class` based on `layout_class`
    `layout_class` means the class type of subclass of `Layout`, e.g. `PlainLayout`

    Args:
        tensor_class: Tensor subclass type
        layout_class: the class type of subclass of `Layout`, e.g. `PlainLayout`

    Returns:
        tensor impl subclass constructor for the layout_class
    r  z1no registered tensor_impl class constructor for: zlayout_name: z is not supported yet for )r   r   r  )r  r  s     r(   _get_tensor_impl_constructorr    sy     <!<== 
NNN
 
 	
 <AAARLRRLRR
 
 	
 1,??r*   c                     t          d |D                       }d|v r|                    d           t          j        j        j        |i |\  }}}}|| j        n|}|| j        n|}||d}|S )Nc              3   N   K   | ] }t          |t          j                  |V  !d S rd   )r9   r:   r%  r{  s     r(   r   z!_get_to_kwargs.<locals>.<genexpr>  s3      JJJsEL,I,IJJJJJJJr*   r%  )r$   r   )r  r   r:   ru  _nn	_parse_tor$   r   )r   rJ   rK   r$   r   rO   s         r(   rV  rV    s    JJJJJJJD6

8
  (,0$A&AAFE1a"NT[[F-DJJUE F Mr*   c                       e Zd ZdZed             Z ee          Z ee          Z	 ee
          Z
 ee          Z ee          Z ee          Z ee          ZeZd Zd Zed             Zd Zd Zd Zd	S )
r   a  A util tensor subclass that provides commonly used functions
       new tensor subclass can inherit it to get all the utility functions

       class MyTensor(TorchAOBaseTensor):
           pass

    This includes:
       `_get_to_kwargs` that can get the kwargs for `to`
            class MyTensor(TorchAOBaseTensor):
                def to(self, *args, **kwargs):
                    kwargs = _get_to_kwargs(*args, **kwargs)
                    ...
        `implements`:
            implements = MyTensor.implements

            @implements(torch.nn.functional.linear):
            def _(func, types, args, kwargs):
                ...

        `register_layout`:
            register_layout = MyTensor.register_layout

            @register_layout(PlainLayout)
            class PlainAQTTensorImpl(...):
                ...

         `get_tensor_impl_constructor`:
            get_tensor_impl_constructor = MyTensor.get_tensor_impl_constructor
            # in constructor of MyTensor:
            tensor_impl_ctr = get_tensor_impl_constructor(type(_layout))
            tensor_impl = tensor_impl_ctr(data, scale, zero_point, _layout)

    class variables to define to simplify implmentation of tensor subclasses:
       `tensor_data_names` (List[str]): list of names of all requires tensor_data, order should match
          the `__init__` list of tensor subclass
       `tensor_attribute_names` (List[str]): list of names of non-Tensor attributes,
            order should match the `__init__` list of tensor subclass, following all the `tensor_data_names` arguments
       `optional_tensor_data_names` (List[str]): it's optional to define this field to have the additional boilerplate functions been implemented for you, but this will be need if there are some optional Tensor data attributes, when defined, this will be a list of names of Tensors that can be optional
       `optional_tensor_attribute_names` (List[str]): it's optional to define this field to have the additional boilerplate functions been implemented for you, but this will be need if there are some optional non-Tensor attributes, when defined, this will be a list of names of attributes that can be optional
       Note: Argument order in __init__ and __new__ should match exaclty with tensor_data_names + tensor_attribute_names + optional_tensor_data_names (if present) + optional_tensor_attribute_names (if present)


    If `tensor_data_names` (torch.Tensor data attribute names) and `tensor_attribute_names` (non-torch.Tensor attribute names) are defined, there are some additional
    functions that will be added, this includes:
    `__tensor_flatten__`: flattens a subclassed tensor instance, returns a tuple, first element is tensor data names for valid tensor data,
        second element is a dict from attribute_name to non-Tensor attributes
    `__tensor_unflatten__`: takes a tensor_data_dict (a map from tensor name to Tensor), and list of non-tensor attributes, returns a new instance of the subclassed tensor
    `_apply_fn_to_data`: takes a function (Tensor -> Tensor),  applies function to all tensor data and
        recreate a new subclassed Tensor with the transformed tensor data
    `__repr__`: the string representation of the subclassed tensor instance
    `_same_metadata`: returns whether the metadata is the same between two instances of cls
    `__setstate__`: when loading a serialized tensor subclass checkpoints, it sets the new
    optional tensor and tensor attribute that is saved in the old checkpoint to None,
    to maintain BC of old checkpoints when we add new optional tensor data or attributes to
    the tensor subclass
    torch ops: torch.Tensor.contiguous
    aten ops: aten.detach.default, aten.clone.default, aten.alias,default, aten.contiguous.default, aten.copy_.default, aten._to_copy.default (enables t.to)

    Example:
        class MyTensor(torch.Tensor):
            tensor_data_names = ["a", "b"]
            tensor_attribute_names = ["c", "d"]
            optional_tensor_data_names = ["e", "f"]
            optional_tensor_attribute_names = ["g", "h"]


            def __new__(
                cls,
                a: Tensor,
                b: Tensor,
                c: int,
                d: str,
                e: Optional[Tensor] = None,
                f: Optional[Tensor] = None,
                g: Optional[int] = None,
                h: Optional[int] = None,
            ):
                pass

            def __init__(
                self,
                a: Tensor,
                b: Tensor,
                c: int,
                d: str
                e: Optional[Tensor] = None,
                f: Optional[Tensor] = None,
                g: Optional[int] = None,
                h: Optional[int] = None,
            ):
                pass

    c                 R   t          | d          si | _        t          | d          si | _        | | j        vr
i | j        | <   | | j        vr
i | j        | <   t          | d          r0t          | d          r |                                  t          | _        | j        D ]}t          | d          r4|| j        v r+| j        |                              | j        |                    t          | d          r4|| j        v r+| j        |                              | j        |                    d S )Nr  r  rD  rE  )r   r  r  rj  rs  __setstate__	__bases__update)r  rK   parents      r(   __init_subclass__z#TorchAOBaseTensor.__init_subclass__@  s]   s,-- 	$!#Cs-.. 	%"$Cc(((&(Cs#c)))')C$ 3+,, 	@>V1W1W 	@--///?C m 	M 	MFs,-- K&C<N2N2N"3'..s/A&/IJJJs-.. M6S=P3P3P#C(//0CF0KLLL		M 	Mr*   c                 x    t           j                            t          t	          |                                d S rd   )r:   ru  _log_api_usage_oncer  r=   r	  s      r(   r  zTorchAOBaseTensor.__init__b  s*    $$Sd__55555r*   c                     t           d          rt           d          r j                                        }t           d          r1 j        D ])}t	           |          }||                    |           * fd j        D             }t           d          r| fd j        D             z  }||fS t          d          )NrD  rE  r;  c                 2    i | ]}|t          |          S r"   r>  r&   attrr   s     r(   r   z8TorchAOBaseTensor.__tensor_flatten__.<locals>.<dictcomp>p  s2       .2gdD))  r*   rA  c                 2    i | ]}|t          |          S r"   r>  r  s     r(   r   z8TorchAOBaseTensor.__tensor_flatten__.<locals>.<dictcomp>t  s5     ) ) ) '$--) ) )r*   zSubclasses should implement __tensor_flatten__ or specify `tensor_data_names` and `tensor_attribute_names` for tensor class before using it)	r   rD  copyr;  r   rb   rE  rA  rX  )r   rD  rZ  r[  	attr_dicts   `    r(   r   z$TorchAOBaseTensor.__tensor_flatten__e  s1   4,-- 	0'*3
 3
 	0 !% 6 ; ; = =t9:: C(,(G C C$#*41A#B#BL#/)001ABBB   6:6Q  I t>?? % ) ) ) ) $ D) ) ) 	
 %i//! Z
 
 	
r*   c                 b   t          | d          rt          | d          rfd| j        D             }i }t          | d          rfd| j        D             }fd| j        D             }i }t          | d          rfd| j        D             } | g ||R i ||S t          d	          )
NrD  rE  c                      g | ]
}|         S r"   r"   )r&   r   tensor_data_dicts     r(   rR  z:TorchAOBaseTensor.__tensor_unflatten__.<locals>.<listcomp>  s,          +/ &     r*   r;  c                 >    i | ]}|                     |d           S rd   )r(  )r&   rZ  r  s     r(   r   z:TorchAOBaseTensor.__tensor_unflatten__.<locals>.<dictcomp>  s=     ( ( (( %&6&:&:;KT&R&R( ( (r*   c                      g | ]
}|         S r"   r"   r&   r   r\  s     r(   rR  z:TorchAOBaseTensor.__tensor_unflatten__.<locals>.<listcomp>  s,     # # #,0!$'# # #r*   rA  c                 "    i | ]}||         S r"   r"   r  s     r(   r   z:TorchAOBaseTensor.__tensor_unflatten__.<locals>.<dictcomp>  s1     + + + +D1+ + +r*   zSubclasses should implement __tensor_unflatten__ or specify `tensor_data_names` and `tensor_attribute_names` for tensor class before using it)r   rD  r;  rE  rA  rX  )	r  r  r\  
outer_sizeouter_striderequired_tensorsoptional_tensor_dictrequired_attributesoptional_attribute_dicts	    ``      r(   r   z&TorchAOBaseTensor.__tensor_unflatten__  so    3+,, 	>V1W1W 	       363H      $& s899 ( ( ( (,/,J( ( ($
# # # #474N# # # ')#s=>> + + + + # C+ + +'
 3 !$   ' *	   " \
 
 	
r*   c                     t           d          rt           d          r fd j        D             }i }t           d          r0 j        D ](}t           |          }| |          ||<   #d ||<   ) fd j        D             }i }t           d          r fd j        D             }  j        g ||R i ||S t          d          )	NrD  rE  c                 B    g | ]} t          |                    S r"   r>  )r&   r  r\   r   s     r(   rR  z7TorchAOBaseTensor._apply_fn_to_data.<locals>.<listcomp>  s:          ,074&&''     r*   r;  c                 0    g | ]}t          |          S r"   r>  r  s     r(   rR  z7TorchAOBaseTensor._apply_fn_to_data.<locals>.<listcomp>  s0     # # #(,d### # #r*   rA  c                 2    i | ]}|t          |          S r"   r>  )r&   r   r   s     r(   r   z7TorchAOBaseTensor._apply_fn_to_data.<locals>.<dictcomp>  s5     + + +! wtY77+ + +r*   zSubclasses should implement _apply_fn_to_data or specify `tensor_data_names` and `tensor_attribute_names` for tensor class or tensor instance before using it)r   rD  r;  r   rE  rA  rW  rX  )r   r\   r  r  rZ  r[  r  r  s   ``      r(   r)  z#TorchAOBaseTensor._apply_fn_to_data  s   4,-- 	'*3
 3
 	         484J      $& t9:: F(,(G F F$#*41A#B#BL#/ACLAQAQ,-=>>AE,-=>># # # #040K# # # ')#t>?? + + + +%)%I+ + +'
 "4> !$   ' *	   " l
 
 	
r*   c           	      N   t          | d          rt          | d          rd}|| j        d          dt          | | j        d                    z  }| j        dd          D ]}|d| dt          | |           z  }| j        D ]}|d| dt          | |           z  }t          | d          r#| j        D ]}|d| dt          | |           z  }t          | d	          r#| j        D ]}|d| dt          | |           z  }| j        j         d
| dS t          d          )NrD  rE   r   =r,   z, r;  rA  ()zSubclasses must implement __repr__ or specify `tensor_data_names` and `tensor_attribute_names` for tensor class or tensor instance before using it)	r   rD  r   rE  r;  rA  rW  r   rX  )r   repr_strrZ  tensor_attribute_names       r(   __repr__zTorchAOBaseTensor.__repr__  s   4,-- 	<'*3
 3
 	< H41!4aawtTE[\]E^7_7_aaaH$($:122$> V V U!1UUGDBR4S4SUUU *.)D  %W.WW?T1U1UWW
 t9:: (,(G  $Q-QQ>N0O0OQQHH
 t>?? d-1-Q d d) c%: c cWTK`=a=a c ccHHn-;;;;;;! a
 
 	
r*   c                 4    t          | d          sd S | j        S )N_layout)r   r  )r   s    r(   
get_layoutzTorchAOBaseTensor.get_layout  s!    tY'' 	4|r*   N)r   r   r   r  classmethodr  r  r_  r"  r`  rj  r  __torch_dispatch__rw  __torch_function__r  register_layoutr  get_tensor_impl_constructorrV  r  r   r   r)  r  r  r"   r*   r(   r   r     s#       \ \| M M [M0 [))J +,F G G$/K0M$N$N!$%@AA$%@AA!k"233O"-+.J"K"K#N6 6 6
 
 
4 !
 !
 [!
F#
 #
 #
J 
  
  
D    r*   r   c                 $   |t          |          z
  t          |           k    rt          d          t          |           }t          t          |           |          D ]0}|                    |||z
  t          |          z                       1|S )a  
    __torch_dispatch__ doesn't guarantee the number of arguments you are
    passed (e.g., defaulted arguments are not passed); but usually it is
    convenient to pad out the arguments list with defaults.  This function
    helps you do that.
    Args:
        args: the list of positional arguments passed to __torch_dispatch__
        n: the number of arguments you are expecting to get
        defaults_tail: default values for the arguments, starting from the
            end of the list
    Example:
        >>> fill_defaults([1, 2, 3], 5, [3, 4, 5])
        [1, 2, 3, 4, 5]
        >>> fill_defaults([1, 2, 3], 5, [None, None, None])
        [1, 2, 3, None, None]]
    z%not enough defaults to fill arguments)r/   RuntimeErrorr   rA   rb   )rJ   r~   defaults_tailris        r(   fill_defaultsr    s    " 	3}D		))BCCCT

A3t99a   < <	q1us='9'99:;;;;Hr*   c                  `    t           j                                        ot           j        j        S rd   )r:   r5   ra   r   hipr"   r*   r(   is_ROCMr    s     :""$$:)::r*   c                      t                      r4g d} t          j                            d          j        }| D ]	}||v r dS 
dS )N)gfx940gfx941gfx942r   TFr  r:   r5   get_device_propertiesgcnArchName)
mxArchNamearchNamearchs      r(   r   r     s\    yy 333
:33A66B 	 	Dxtt  5r*   c                  v    t                      r*t          j                            d          j        } d| v rdS dS )Nr   gfx950TFr  r  s    r(   is_MI350r     s;    yy :33A66Bx45r*   c                  p    t                      r't          j                            d          j        } 	 dS dS )Nr   gfx1200gfx1201TFr  r  s    r(   is_Navi4r  (  s5    yy :33A66B	45r*   r   r   c                     t           j                                        ot           j        j        }|r$t           j                                        | |fk    ndS )z0Check if the CUDA version is exactly major.minorFr:   r5   ra   r   rj   )r   r   is_cudas      r(   is_sm_versionr  0  sG    j%%''>EM,>GCJU5:++--%??PUUr*   c                      t           j                                        o2t           j        j        o!t           j                                        dk    S )N)   	   r  r"   r*   r(   r   r   6  @    
!! 	9M	9J,,..&8r*   c                      t           j                                        o2t           j        j        o!t           j                                        dk    S )N)r  r   r  r"   r*   r(   r   r   >  r  r*   c                      t           j                                        o2t           j        j        o!t           j                                        dk    S )N)
   r   r  r"   r*   r(   r   r   G  s@    
!! 	:M	:J,,..'9r*   c                     t           j                                        sdS t           j        j        }|dS t	          t
          |                    d          d d                   \  }}||f| |fk    S )NFri   r,  )r:   r5   ra   r   r   r   split)r   r   cuda_version
cuda_major
cuda_minors        r(   r   r   O  sq    :""$$ u=%Lu l&8&8&=&=bqb&ABBJ

#u~55r*   2.6.0c                 n    t          | t          j                  r| j        } | dk    ot	          |          S )Nr8   r9   r:   r$   r=   r   r$   r   s     r(   check_cpu_versionr  Y  5    &%,'' U?>5g>>>r*   2.8.0c                 n    t          | t          j                  r| j        } | dk    ot	          |          S )Nr`   r  r  s     r(   check_xpu_versionr  _  r  r*   c                     | |z   dz
  |z  S r1  r"   )r   r   s     r(   ceil_divr  e  s    EAI!r*   package_namer   c                 r    t           j                            |           d u}|sdS t          |           |k    S )NF)	importlibutil	find_specr   )r  r   package_existss      r(   r   r   i  s<    ^--l;;4GN u<  K//r*   c                      t           j                            d          t           j                            d          dS dd l} t	                      s| j        dk     rdS dS )N
fbgemm_gpuzfbgemm_gpu.experimentalFr   z1.2.0T)r  r  r  fbgemm_gpu.experimental.gen_air   r   )r   s    r(   _is_fbgemm_gpu_genai_availabler  q  se     	  ..6>##$=>>Fu))));; :1G;;u4r*   c                   N     e Zd ZdZddej        deej                 f fdZ xZS )r   a  This is used because the TorchAO quantization functions tend to operate on modules so to apply the transform to a tensor, we can load a
    DummyModule with the target tensor and then apply the transformation to the module and then extract the transformed tensor.
    Nr   biasc                 d    t                                                       || _        || _        d S rd   )superr  r   r  )r   r   r  rW  s      r(   r  zDummyModule.__init__  s+    			r*   rd   )	r   r   r   r  r:   r   r
   r  __classcell__)rW  s   @r(   r   r     sc          u| 8EL3I          r*   r   )r"   NN)T)Frd   )r"   N)r  )r  )Lr  r  r   r   rG   r  r   importlib.metadatar   mathr   typingr   r	   r
   r   r:   torch.nn.utils.parametrizer;   rx   r   torch.utils._python_dispatchr   __all__cacher<   r3   r   r   r   rg   r   r   rr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r"  rj  rs  rw  r  r  r  rV  r   r  r  r   r  r  r  r   r   r   r   r  r  r  r  r   r  r   r"   r*   r(   <module>r     sL               				         & & & & & &       0 0 0 0 0 0 0 0 0 0 0 0  0 0 0 0 0 0 0 0 0 0 0 0 D D D D D D  , %(/ c    "5$ 5$ 5$ 5$p
 
 
      U\ u|     " " "S      4 4 4 4n     <         58?      F   85; 4    M M M$5 5 5J J J0 0 0 0 0 0 0 0&  ,  ,X
 X
 X
v@ @ @(% % % %&  ,8 8    :@@*2@@ @ @ @2  &L L L L L L L L^  D; ; ;      V VS VT V V V V      6C 6 6 6 6 6 6? ? ? ?? ? ? ?  0c 0 0 0 0 0  "    %(/     r*   