
    kh%                         d Z ddlZddlZddlZddlZddlZddlmZ	 dZ
	 defdZdefdZdefdZdefdZdefdZd	 Z ej&                         d
        Zd Zd ZddZddZd ZddZddZddZy)z PyTorch distributed helpers

Some of this lifted from Detectron2 with other fns added by myself. Some of the Detectron2 fns
were intended for use with GLOO PG. I am using NCCL here with default PG so not everything will work
as is -RW
    Nreturnc                  ~    t        j                         syt        j                         syt        j                         S )N   )distis_availableis_initializedget_world_size     N/var/www/teggl/fontify/venv/lib/python3.12/site-packages/effdet/distributed.pyr	   r	      s/       r   c                  ~    t        j                         syt        j                         syt        j                         S Nr   )r   r   r   get_rankr
   r   r   r   r      s,     ==?r   c                      t        j                         syt        j                         syt        J t        j                  t              S )zh
    Returns:
        The rank of the current process within the local (per-machine) process group.
    r   group)r   r   r   _LOCAL_PROCESS_GROUPr   r
   r   r   get_local_rankr   %   s>    
  +++==344r   c                      t        j                         syt        j                         syt        j                  t              S )zw
    Returns:
        The size of the per-machine process group,
        i.e. the number of processes per machine.
    r   r   )r   r   r   r	   r   r
   r   r   get_local_sizer   2   s4      %9::r   c                      t               dk(  S r   )r   r
   r   r   is_main_processr   ?   s    :?r   c                      t        j                         syt        j                         syt        j                         } | dk(  ryt        j                          y)zj
    Helper function to synchronize (barrier) among all processes when
    using distributed training
    Nr   )r   r   r   r	   barrier)
world_sizes    r   synchronizer   C   sC    
  $$&JQLLNr   c                      t        j                         dk(  rt        j                  d      S t         j                  j                  S )zj
    Return a process group based on gloo backend, containing all the ranks
    The result is cached.
    ncclgloo)backend)r   get_backend	new_groupr   WORLDr
   r   r   _get_global_gloo_groupr$   R   s4     V#~~f--zzr   c                    t        j                  |      }|dv sJ t        j                  |dk(  rdnd      }t	        j
                  |       }t        |      dkD  rOt        j                  t              }|j                  dj                  t               t        |      dz  |             t        j                  j                  |      }t        j                  |      j!                  |      }|S )N)r   r   r   cpucudai   @z;Rank {} trying to all-gather {:.2f} GB of data on device {})device)r   r!   torchr(   pickledumpslenlogging	getLogger__name__warningformatr   ByteStoragefrom_buffer
ByteTensorto)datar   r    r(   bufferloggerstoragetensors           r   _serialize_to_tensorr;   ^   s    u%G&&&&\\7f#4%&AF\\$F
6{Y""8,IPP
CK95v	

 ++F3Gg&)))8FMr   c                    t        j                  |      }|dk\  sJ d       t        j                  | j	                         gt        j
                  | j                        }t        |      D cg c]3  }t        j                  dgt        j
                  | j                        5 }}t        j                  |||       |D cg c]  }t        |j                                }}t        |      }||k7  rMt        j                  ||z
  ft        j                  | j                        }t        j                  | |fd      } || fS c c}w c c}w )zz
    Returns:
        list[int]: size of the tensor, on each rank
        Tensor: padded tensor that has the max size
    r   r   zHcomm.gather/all_gather must be called from ranks within the given group!dtyper(   r   dim)r   r	   r)   r:   numelint64r(   rangezeros
all_gatherintitemmaxuint8cat)	r:   r   r   
local_size_	size_listsizemax_sizepaddings	            r   _pad_to_largest_tensorrQ   p   s    $$51JaRQRv||~.ekk&--XJKPQ[K\FGQCu{{6==AI  	OOIz7.78dTYY[!8I89~H X++x*46ekkRXR_R_`FG,!4f 9s   /8E Ec                 H   t               dk(  r| gS |
t               }t        j                   |      dk(  r| gS t        | |      }t	        ||      \  }}t        |      }|D cg c]3  }t        j                  |ft        j                  |j                        5 }}t        j                  |||       g }t        ||      D ]X  \  }}|j                         j                         j                         d| }	|j                  t!        j"                  |	             Z |S c c}w )a;  
    Run all_gather on arbitrary picklable data (not necessarily tensors).
    Args:
        data: any picklable object
        group: a torch process group. By default, will use a group which
            contains all ranks on gloo backend.
    Returns:
        list[data]: list of data gathered from each rank
    r   Nr=   r   )r	   r$   r   r;   rQ   rH   r)   emptyrI   r(   rE   zipr&   numpytobytesappendr*   loads)
r6   r   r:   rM   rO   rL   tensor_list	data_listrN   r7   s
             r   rE   rE      s
    1v}&(5!Q&v!$.F.vu=Iv9~H _hhYZ5;;{%++fmmThKhOOKu5II{3 /f##%--/6f-./  is   "8Dc                    t               dk(  r| gS |
t               }t        j                   |      dk(  r| gS t        j                  |      }t	        | |      }t        ||      \  }}||k(  rt        |      }|D cg c]3  }t        j                  |ft        j                  |j                        5 }}t        j                  ||||       g }	t        ||      D ]X  \  }
}|j                         j                         j                         d|
 }|	j!                  t#        j$                  |             Z |	S t        j                  |g ||       g S c c}w )a  
    Run gather on arbitrary picklable data (not necessarily tensors).
    Args:
        data: any picklable object
        dst (int): destination rank
        group: a torch process group. By default, will use a group which
            contains all ranks on gloo backend.
    Returns:
        list[data]: on dst, a list of data gathered from each rank. Otherwise,
            an empty list.
    r   Nr   r=   dstr   )r	   r$   r   r   r;   rQ   rH   r)   rS   rI   r(   gatherrT   r&   rU   rV   rW   r*   rX   )r6   r]   r   rankr:   rM   rO   rL   rY   rZ   rN   r7   s               r   r^   r^      s6    1v}&('1,v==u%D!$.F.vu=Iv s{y>bkl]^u{{H;ekk&--XllFKS>		;7 	3LD&ZZ\'')113ET:FV\\&12	3 FBCu5	 ms   >8Ec                  `    t         j                  j                  d      } t        |       }|d   S )z
    Returns:
        int: a random number that is the same across all workers.
            If workers need a shared RNG, they can use this shared seed to
            create one.
    All workers must call this function, otherwise it will deadlock.
    l        r   )nprandomrandintrE   )intsall_intss     r   shared_random_seedrf      s,     99W%D$HA;r   c                    t               }|dk  r| S t        j                         5  g }g }t        | j	                               D ]'  }|j                  |       |j                  | |          ) t        j                  |d      }t        j                  |d       t        j                         dk(  r|r||z  }t        ||      D ci c]  \  }}||
 }}}ddd       |S c c}}w # 1 sw Y   S xY w)ac  
    Reduce the values in the dictionary from all processes so that process with rank
    0 has the reduced results.
    Args:
        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
        average (bool): whether to do average or sum
    Returns:
        a dict with the same keys as input_dict, after reduction.
       r   r?   )r]   N)r	   r)   no_gradsortedkeysrW   stackr   reducer   rT   )
input_dictaverager   namesvalueskvreduced_dicts           r   reduce_dictru      s      !JA~	 =
)* 	)ALLOMM*Q-(	) V+F"==?aG j F),UF);<A1<<=  == s   B#C)	C#C)#C))C3c                    xs t         j                  j                  t        j                        fd}t	        | t
              r1t               }| j                         D ]  \  }} ||      }|||<    |S t	        | t        t        f      r2| D cg c]
  } ||       }}t	        | t              rt        |      }|S t	        | t        j                        sJ  ||       S c c}w )Nc                     t              D cg c]  }t        j                  |        }}t        j                  ||        t        j
                  |      S c c}w )Nr   r?   )rC   r)   
empty_liker   rE   rJ   )r:   rL   rY   cat_dimr   r   s      r   
_do_gatherz(all_gather_container.<locals>._do_gather  sM    9>z9JKAu''/KKV59yy'22 Ls   A)r   r   r#   r	   
isinstancedictitemslisttupler)   Tensor)	containerr   ry   rz   gatheredrr   rs   r   s    ``    @r   all_gather_containerr      s    %TZZ%%E$$U+J3
 )T"6OO% 	DAq1AHQK	 	Ie}	-+45aJqM55i'XH )U\\222)$$ 6s   C+c                   	 xs t         j                  j                  t        j                        	t        j                        	fd}t        | t              r1t               }| j                         D ]  \  }} ||      }|||<    |S t        | t        t        f      r2| D cg c]
  } ||       }}t        | t              rt        |      }|S t        | t        j                        sJ  ||       S c c}w )Nc                     k(  r,t              D cg c]  }t        j                  |        }}nd }t        j                  | |       t        j
                  |      S c c}w )Nr\   r?   )rC   r)   rx   r   r^   rJ   )r:   rL   rY   ry   r]   r   	this_rankr   s      r   rz   z$gather_container.<locals>._do_gather  s\    =B:=NO5++F3OKOKFKS>yy'22	 Ps   A%)r   r   r#   r	   r   r{   r|   r}   r~   r   r)   r   )
r   r]   r   ry   rz   r   rr   rs   r   r   s
    ```    @@r   gather_containerr     s    %TZZ%%E$$U+Je$I3 3 )T"6OO% 	DAq1AHQK	 	Ie}	-+45aJqM55i'XH )U\\222)$$ 6s   2D)N)r   N)Tr   )__doc__	functoolsr-   rU   ra   r*   r)   torch.distributeddistributedr   r   rF   r	   r   r   r   boolr   r   	lru_cacher$   r;   rQ   rE   r^   rf   ru   r   r   r
   r   r   <module>r      s           ! !# 
5 
5
; 
;     $6D$N
<%4%r   