
    `i;                     `   U d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZd dl	m
Z
mZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZmZmZmZmZmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z% d d	l&m'Z' d d
l(m)Z)  ej*        e+          Z,ej-        e.d<   ddgZ/ G d de          Z0 G d de
          Z1dS )    N)AnyOptional)FileSystemReaderFileSystemWriter)consolidate_safetensors_files)_gen_file_name_HFStorageInfo_metadata_fnCUSTOM_METADATA_KEYSAVED_OFFSETS_KEYSHARDED_DIR_NAMESUFFIX)SerializationFormat)ChunkStorageMetadataMetadataMetadataIndexStorageMetaTensorPropertiesTensorStorageMetadata)LoadPlanLoadPlannerReadItemSavePlanSavePlanner	WriteItem)WriteResult)FutureloggerHuggingFaceStorageWriterHuggingFaceStorageReaderc                   `    e Zd ZdZ	 	 	 	 	 ddedeeeef                  deded	ed
eddf fdZ	de
e         de
e         fdZdededee
e                  f fdZdede
e
e                  ddfdZdeeeef                  de
e         deee
e         f         fdZedefd            Z xZS )r   zP
    A writer that writes to storage in the huggingface safetensors format.
    N   Fpathfqn_to_index_mappingthread_countsave_distributedenable_consolidationthread_count_consolidationreturnc                 >   t                                          |t          j        |           || _        || _        || _        d| _        | j        rCt          | j	                  | _        | j
                            | j	        t                    | _	        || _        dS )a  
        Initialize the huggingface writer pointing to path.

        Args:
            path: directory where the checkpoint will be read from.
            fqn_to_index_mapping: A mapping from tensor FQN to the index of the file that the tensor should be written to.
                              Indices are from 1 to N, where N is the number of files. If not provided,
                              the tensors will be written to a single file. If none, then all the tensors on the
                              same rank will be written to the same file.
            thread_count: Number of threads to use to write distributed checkpoint. Default to 1.
            save_distributed: If True, save the checkpoint using distributed APIs where every rank saves its own shard.
                        Default is False which assumes rank-0 checkpointing of the full state_dict.
            enable_consolidation: If True, consolidate the sharded checkpoint after saving. The sharded tensors will be
                                saved to path/sharded and the full tensors will be saved to path. Default to False.
            thread_count_consolidation: Number of threads to use for parallel processing of saving data
                                to consolidated output files. Default to 1.
        )r#   serialization_formatr%   N)super__init__r   SAFETENSORSr$   r&   r'   consolidated_output_pathstrr#   fsconcat_pathr   r(   )selfr#   r$   r%   r&   r'   r(   	__class__s          {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/distributed/checkpoint/hf_storage.pyr-   z!HuggingFaceStorageWriter.__init__6   s    6 	!4!@% 	 	
 	
 	

 ?S!&6*>!7;%$ 	I,/	NND)++DI7GHHDI*D'''    plansc                     g }t          |d          D ]M\  }}i }| j        
| j        |d<   | j        r||d<   |                    t	          j        ||                     N|S )Nr"   )startr$   shard_index)storage_data)	enumerater$   r&   appenddataclassesreplace)r3   r7   	new_plansiplanr;   s         r5   prepare_global_planz,HuggingFaceStorageWriter.prepare_global_plan_   s    	 a000 	S 	SGAt+-L(47;7P34$ 0./]+[0LQQQRRRRr6   rB   plannerc                 p   t          |j                  dk    r%t                      }|                    g            |S |j        }d }d }d|v r|d         }d|v r|d         }|                     ||j                  }|!t          |                                          nd}t          j	                    }	|                                D ]L\  }
}t          |
||          }|	                    | j                            | j        |          ||f           Mt                                          ||	          S )Nr   r$   r:   r"   )lenitemsr   
set_resultr;   _split_by_storage_planmaxvaluesqueueQueuer   putr1   r2   r#   r,   _write_data)r3   rB   rD   futr;   storage_planr:   bucketshighest_index
file_queue
file_indexwrite_items	file_namer4   s                r5   
write_dataz#HuggingFaceStorageWriter.write_datal   s?   
 tz??a ((CNN2J (,'815%)!\11'(>?LL((&}5K--lDJGG6B6NL//11222TU"'+--
'.}} 	 	#J&z=+NNINN$$TY	::I{S    ww""7J777r6   metadataresultsc                    | j         r#| j        st                              d           d S | j         ro| j        | j        n1t
                              |j                                        d          }t          t          | j                  | j        | j        |          S i }i }d}|D ]=}|                    d |D                        |t          d |D                       z  }>d|i|d<   ||d	<   | j                            | j        t$                     }| j                            |d
          5 }	t)          j        ||	d           d d d            d S # 1 swxY w Y   d S )Nz4Not consolidating sharded checkpoint in finish step.r"   )	input_dir
output_dirnum_threadsr$   r   c                 <    i | ]}|j         j        |j        j        S  )indexfqnr;   relative_path.0wrs     r5   
<dictcomp>z3HuggingFaceStorageWriter.finish.<locals>.<dictcomp>   s#    OOOr<OOOr6   c                 &    g | ]}|j         j        S r`   )r;   lengthrd   s     r5   
<listcomp>z3HuggingFaceStorageWriter.finish.<locals>.<listcomp>   s    HHH"r5HHHr6   
total_sizerY   
weight_mapw   )indent)r&   r'   r   infor$   dictfromkeysstate_dict_metadatakeysr   r0   r#   r/   r(   updatesumr1   r2   r
   create_streamjsondump)
r3   rY   rZ   r$   metadata_to_write
storage_mdrk   wr_listmetadata_pathmetadata_files
             r5   finishzHuggingFaceStorageWriter.finish   s     	)B 	
 KKNOOOF  	 ,8 ))]]8#?#D#D#F#FJJ ! 1di..8 ;%9	    

 	J 	JGOOwOOO   #HHHHHIIIJJ)5z(B*%*4,'++DI,7HIIW""=#66 	B-I'qAAAA	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	Bs   4EE ErQ   rG   c                     |d|iS i }|D ]<}|j         j        }||         }||vr|g||<   !||                             |           =|S )Nr"   )ra   rb   r=   )r3   rQ   rG   rR   itemkeyidxs          r5   rI   z/HuggingFaceStorageWriter._split_by_storage_plan   ss     u: 	* 	*D*.Cs#C'!! $v##D))))r6   c                     t           S N)r
   )r3   s    r5   r}   z&HuggingFaceStorageWriter.metadata_path   s    r6   )Nr"   FFr"   )__name__
__module____qualname____doc__r0   r   rq   intboolr-   listr   rC   r   r   r   rX   r   r   r   rI   propertyr}   __classcell__r4   s   @r5   r   r   1   s         :>!&%**+'E 'E'E 'tCH~6'E 	'E
 'E #'E %('E 
'E 'E 'E 'E 'E 'ERh DN    88 8 
[!	"	8 8 8 8 8 8>%Bx %B$tK7H2I %Bd %B %B %B %BN$T#s(^4=A)_	c4	?"	#   & s    X    r6   c                        e Zd ZdZddededdf fdZded	eddfd
Z	de
j        de
j        d	eddfdZded	eded         fdZdefdZ xZS )r    zQ
    A reader that reads a checkpoint in the huggingface safetensors format.
    r"   r#   r%   r)   Nc                 Z    t                                          |           || _        dS )z
        Initialize the huggingface reader pointing to path.

        Args:
            path: directory where the checkpoint will be read from.
            thread_count: Number of threads to use to read distributed checkpoint. Default to 1.
        )r#   N)r,   r-   r%   )r3   r#   r%   r4   s      r5   r-   z!HuggingFaceStorageReader.__init__   s.     	d###(r6   reqrD   c                 "   t          d t          |j        |j                  D                       }|                    |j        j                  |         }|                    |                                          }|	                                |	                                k    s<J d|j         d|	                                 d|	                                             |
                    |           |                    ||           dS )z1Helper function to process a single read request.c              3   B   K   | ]\  }}t          |||z             V  d S r   )slice)re   offsetri   s      r5   	<genexpr>zAHuggingFaceStorageReader._process_read_request.<locals>.<genexpr>   sH       
 
 &&6/**
 
 
 
 
 
r6   zreq z mismatch sizes z vs N)tuplezipstorage_offsetslengths	get_slicestorage_indexrb   resolve_tensordetachsizecopy_commit_tensor)r3   fr   rD   slicestensortarget_tensors          r5   _process_read_requestz.HuggingFaceStorageReader._process_read_request   s     
 
"%c&93;"G"G
 
 
 
 
 S.233F;..s33::<<!!##v{{}}444_3$__m6H6H6J6J__PVP[P[P]P]__ 544 	F###c=11111r6   rT   result_queuec                    ddl m} 	 	 |                                \  }} ||d          5 }|D ]}|                     |||           	 d d d            n# 1 swxY w Y   |                    d           o# t
          j        $ r Y d S w xY w)Nr   	safe_openTptfilename	framework)safetensorsr   
get_nowaitr   rN   rL   Empty)	r3   rT   r   rD   r   rW   reqsr   r   s	            r5   _read_files_from_queuez/HuggingFaceStorageReader._read_files_from_queue   s    	*)))))	'","7"7"9"9	4Y	TBBB Da# D D221c7CCCCDD D D D D D D D D D D D D D D   &&&' { 	 	 	DD	s4   %A8 AA8 AA8 AA8 8B
BrB   c                    ddl m} i }|j        D ]D}| j        |j                 }|j        }|                    |g                               |           E| j        dk    st          |          dk    r^|                                D ]G\  }} ||d          5 }	|D ]}
| 
                    |	|
|           	 d d d            n# 1 swxY w Y   HnPt          j                    }t          j                    }|                                D ]\  }}|                    ||f           g }t          | j        t          |                    }t          |          D ]I}t!          j        | j        |||f          }|                                 |                    |           J|D ]}|                                 d}	 	 |                                 |dz  }# t          j        $ r Y nw xY w|t          |          k    sJ d| d	t          |                       t/                      }|                    d            |S )
Nr   r   r"   r   r   )targetargsTzNot all files were processed: z out of )r   r   rG   r;   r   rc   
setdefaultr=   r%   rF   r   rL   rM   rN   minrange	threadingThreadr   r9   joinr   r   r   rH   )r3   rB   rD   r   per_file	read_itemitem_mdrW   r   r   r   rT   r   threadsr^   _tprocessed_countrP   s                      r5   	read_dataz"HuggingFaceStorageReader.read_data   s   )))))).0 	A 	AI&*&7	8O&PG-I	2..55i@@@@!!S]]a%7%7#+>>#3#3 D D	4Y	TBBB Da# D D221c7CCCCDD D D D D D D D D D D D D D DD ',kmmJ(-L $,>>#3#3 2 2	4	401111 Gd/X??K;'' " "$6$lG<   			q!!!!     O) ++---#q(O) ;    #c(mm333YYY#h--YY 433 hht
s$   CC	
C	G G.-G.c                 D   ddl m} ddlm} i }i }g }| j                            | j                  D ]1}|                    t                    r|	                    |           2|D ]} ||d          5 }|
                                }	|                                }
d }|
rF|
                    t                    r,t          j        |
                    t                              }|	D ]S}|                    |                                          }|                    |                                          }|||         t&                   }ndgt)          |          z  }||vrt+          t-           ||                    t/          j        d t3          ||          D                       t5          t/          j        |          t/          j        |                    g	          ||<   n||         j        	                    t5          t/          j        |          t/          j        |          
                     t9          ||         j                  }t=          t)          |                    D ]*}t?          ||         ||         ||         z             ||<   +t/          j        |          ||         _        |#tA          |||         t&                             }n"tA          |dgt)          |          z            }tC          |t/          j        |           ||                    ||<   U	 d d d            n# 1 swxY w Y   tE          ||          }tG          |dd           tI                      |_%        | j&        |j%        _&        |S )Nr   r   )	_getdtyper   )r   )dtypec                     g | ]
\  }}||z   S r`   r`   )re   savedr   s      r5   rj   z:HuggingFaceStorageReader.read_metadata.<locals>.<listcomp>Z  s      X X XME6 X X Xr6   )offsetssizes)
propertiesr   chunks)r   )rb   r   )rc   shaper   )rs   r;   storage_meta)'r   r   safetensors.torchr   r1   lsr#   endswithr   r=   rt   rY   getr   rx   loadsr   	get_shape	get_dtyper   rF   r   r   torchSizer   r   r   r   r   r   rJ   r   r	   r   getattrr   r   load_id)r3   r   r   rs   r;   safetensors_filesfilesafetensor_filer   rt   extra_metadatadcp_sharding_infor   r   r   r   r   rA   metadata_indexrY   s                       r5   read_metadataz&HuggingFaceStorageReader.read_metadata6  s   ))))))//////@B<>GJJty)) 	/ 	/D}}V$$ /!((...0 7	 7	O?d;;; 6qvvxx!"$(!! n&8&89L&M&M (,
&**+>??) )%   , ,CKK,,6688EKK,,6688E(4!23!78I!J"#s5zz!1"5553H'7ii>N>N'O'O'O!& X XSPVEWEW X X X" " !5,1Jv,>,>*/*U*;*;!" !" !"$4 4 4+C00 ,C07>>0 %
6 2 2%*U:K:K    
  $$7$<$ABB!&s4yy!1!1 I IA&)$q'58fQi3G&H&HDGG8=
48H8H+C05 )4)6 #,=c,BCT,U* * * *73sSQVZZGW)X)X)X3A&5#j//'i..4 4 4L00Q,6 6 6 6 6 6 6 6 6 6 6 6 6 6 6p  3%
 
 

 8^T22:$/MMH!(,%s   6K
MM	M	)r"   )r   r   r   r   r0   r   r-   r   r   r   rL   rM   r   r   r   r   r   r   r   r   s   @r5   r    r       s        
) 
)S 
) 
)D 
) 
) 
) 
) 
) 
)2H 2{ 2t 2 2 2 2"K k 	
 
   $6h 6 6 6 6 6 6pNx N N N N N N N Nr6   )2r>   rx   loggingrL   r   typingr   r   r   torch.distributed.checkpointr   r   8torch.distributed.checkpoint._consolidate_hf_safetensorsr   &torch.distributed.checkpoint._hf_utilsr   r	   r
   r   r   r   r   'torch.distributed.checkpoint.filesystemr   %torch.distributed.checkpoint.metadatar   r   r   r   r   r   $torch.distributed.checkpoint.plannerr   r   r   r   r   r   $torch.distributed.checkpoint.storager   torch.futuresr   	getLoggerr   r   Logger__annotations____all__r   r    r`   r6   r5   <module>r      sf                                K K K K K K K K                       H G G G G G                               = < < < < <             +*844 4 4 4%'A
BV V V V V/ V V Vrz z z z z/ z z z z zr6   