
    kh0                     ~    d Z ddlZddlmZ ddlmZmZ ddlmZmZ  G d d      Z	 G d	 d
e	e      Z
 G d de
      Zy)z9A group of instances, e.g. Blocks, Lines, Spans, Shapes.
    N   )Element)ITextTextDirection)solve_rects_intersection	graph_bfsc                   |    e Zd ZdZddefdZd Zd Zd Ze	d        Z
e	d	        Zd
 ZdefdZddefdZd Zd Zy)BaseCollectionz1Base collection representing a list of instances.N	instancesc                 J    || _         g | _        | j                  |xs g        y)z)Init collection from a list of instances.N)_parent
_instancesextend)selfr   parents      V/var/www/teggl/fontify/venv/lib/python3.12/site-packages/pdf2docx/common/Collection.py__init__zBaseCollection.__init__   s     IO$    c                 b    	 | j                   |   }|S # t        $ r d| d}t        |      w xY w)NzCollection index z out of range.)r   
IndexError)r   idxr   msgs       r   __getitem__zBaseCollection.__getitem__   sF    	,I
 	  	"%cU.9CS/!	"s    .c                 (    d | j                   D        S )Nc              3       K   | ]  }|  y wN .0instances     r   	<genexpr>z*BaseCollection.__iter__.<locals>.<genexpr>   s     IXIs   )r   r   s    r   __iter__zBaseCollection.__iter__   s    IIIr   c                 ,    t        | j                        S r   )lenr   r"   s    r   __len__zBaseCollection.__len__   s    c$//22r   c                     | j                   S r   )r   r"   s    r   r   zBaseCollection.parent!   s    !\\)r   c           	          t        j                         }| j                  D ]  }||j                  z  } t        j                  |D cg c]  }t	        |d       c}      S c c}w )zbbox of combined collection.r   )fitzRectr   bboxround)r   rectr    xs       r   r+   zBaseCollection.bbox%   sS     yy{ 	"HHMM!D	"yyd3%!*3443s   A"c                 @    |sy | j                   j                  |       y r   )r   appendr   r    s     r   r0   zBaseCollection.append.   s    x(r   c                 :    |sy |D ]  }| j                  |        y r   )r0   )r   r   r    s      r   r   zBaseCollection.extend3   s    &!8H4;;x#88r   c                 >    g | _         | j                  |xs g        | S )zReset instances list.

        Args:
            instances (list, optional): reset to target instances. Defaults to None.

        Returns:
            BaseCollection: self
        )r   r   )r   r   s     r   resetzBaseCollection.reset8   s      IO$r   c                 \    | j                   D cg c]  }|j                          c}S c c}w )z Store attributes in json format.)r   storer1   s     r   r6   zBaseCollection.storeF   s!    26//Ch!CCCs   )c                     t         )z)Construct Collection from a list of dict.)NotImplementedError)r   argskwargss      r   restorezBaseCollection.restoreK   s    !!r   )NNr   )__name__
__module____qualname____doc__listr   r   r#   r&   propertyr   r+   r0   r   r4   r6   r;   r   r   r   r
   r
      si    ;% % J2) ) 5 5)
9t 9
d D
"r   r
   c                       e Zd ZdZed        Zd ZdedefdZddede	d	e	fd
Z
ddede	d	e	fdZdde	d	e	fdZd Zd Zd Zy)
CollectionzACollection of instance focusing on grouping and sorting elements.c                     t        d | j                  D              }t        |      dk(  rt        |      d   S t        j
                  S )z@Get text direction. All instances must have same text direction.c              3   4   K   | ]  }|j                     y wr   text_directionr   s     r   r!   z,Collection.text_direction.<locals>.<genexpr>V   s     Jh())Js   r   r   )setr   r%   r@   r   MIX)r   ress     r   rG   zCollection.text_directionS   s;     J$//JJ"3x{tCy|A0A0AAr   c                    t        | j                        }t        |      D cg c]  }t                }}t	        | j                        D ]X  \  }}t        |dz   |      D ]A  } ||| j                  |         s||   j                  |       ||   j                  |       C Z t        |      }|D cg c].  }| j                  |D cg c]  }| j                  |    c}      0 }}}|S c c}w c c}w c c}}w )a  Group instances according to user defined criterion.

        Args:
            fun (function): with 2 arguments representing 2 instances (Element) and return bool.

        Returns:
            list: a list of grouped ``Collection`` instances.
        
        Examples 1::

            # group instances intersected with each other
            fun = lambda a,b: a.bbox & b.bbox
        
        Examples 2::

            # group instances aligned horizontally
            fun = lambda a,b: a.horizontally_aligned_with(b)
        
        .. note::
            It's equal to a GRAPH searching problem, build adjacent list, and then search graph
            to find all connected components.
        r   )r%   r   rangerH   	enumerateaddr   	__class__)	r   funnumiindex_groupsr    jgroupsgroups	            r   rV   zCollection.groupZ   s    4 $//"',Sz2!22$T__5 	+KAx1Q3_ +x!34 O''* O''*+	+ <(SYZ%$..e!D$//!"4!DEZZ 3 "EZs   C.6C8
C3 	C83C8dxdyc                 j   t        | j                        }t        |      D cg c]  }t                }}g d}}| | ||f}| j                  D ]c  }	t	        |	j
                  |      D 
cg c]
  \  }
}|
|z    }}
}|j                  |||d   f       |j                  |dz   ||d   f       |dz  }e |j                  d        t        |d|z  |       t        |      }|D cg c].  }| j                  |D cg c]  }| j                  |    c}      0 }}}|S c c}w c c}}
w c c}w c c}}w )ai  Collect connected instances into same group.

        Args:
            dx (float): x-tolerances to define connectivity
            dy (float): y-tolerances to define connectivity

        Returns:
            list: a list of grouped ``Collection`` instances.
        
        .. note::
            * It's equal to a GRAPH traversing problem, which the critical point in 
              building the adjacent list, especially a large number of vertex (paths).

            * Checking intersections between paths is actually a Rectangle-Intersection 
              problem, studied already in many literatures.
        r   r      c                     | d   S )Nr   )items    r   <lambda>z2Collection.group_by_connectivity.<locals>.<lambda>   s
    tBx r   key)r%   r   rL   rH   zipr+   r0   sortr   r   rO   )r   rW   rX   rQ   _rS   i_rect_xrR   d_rectr-   abpointsrU   rV   s                  r   group_by_connectivityz Collection.group_by_connectivity   s1   & $//"',Sz2!22 !!#sB#OO 	D%(F%;<caac<F<OOQ&&)45OOQqS&&)45FA		
 	/0 1S5,? <(SYZ%$..e!D$//!"4!DEZZ! 3 = "EZs#   D*D$'D/;D*	D/*D/factorsortedrG   c                     fd}| j                  |      }|r'r| j                  rdnd|j                  fd       |S )z.Group elements into columns based on the bbox.c                 ,    | j                  |      S N)rj   rG   )vertically_align_withrf   rg   rj   rG   s     r   r^   z-Collection.group_by_columns.<locals>.<lambda>   s    !11!FSa1b r      r   c                 "    | j                      S r   r+   rV   r   s    r   r^   z-Collection.group_by_columns.<locals>.<lambda>       %**S/ r   r_   rV   is_vertical_textrb   r   rj   rk   rG   rP   rU   r   s    ` `  @r   group_by_columnszCollection.group_by_columns   sB     cC %$*?*?!QCKK9K:r   c                     fd}| j                  |      }|r'r| j                  rdnd|j                  fd       |S )z+Group elements into rows based on the bbox.c                 ,    | j                  |      S rn   )horizontally_align_withrp   s     r   r^   z*Collection.group_by_rows.<locals>.<lambda>   s    !33AfUc3d r   r   r   c                 "    | j                      S r   rs   rt   s    r   r^   z*Collection.group_by_rows.<locals>.<lambda>   ru   r   r_   rv   rx   s    ` `  @r   group_by_rowszCollection.group_by_rows   sB     eC %$*?*?!QCKK9K:r   c                     d }| j                  |      }|r'|r| j                  rdnd|j                  fd       |S )zGroup lines into physical rows.c                 $    | j                  |      S r   )in_same_row)rf   rg   s     r   r^   z3Collection.group_by_physical_rows.<locals>.<lambda>   s    !--* r   r   r   c                 "    | j                      S r   rs   rt   s    r   r^   z3Collection.group_by_physical_rows.<locals>.<lambda>   ru   r   r_   rv   )r   rk   rG   rP   rU   r   s        @r   group_by_physical_rowsz!Collection.group_by_physical_rows   s?    *C %$*?*?!QCKK9K:r   c                     | j                   r| j                  j                  d        | S | j                  j                  d        | S )zSort collection instances in reading order (considering text direction), e.g.
            for normal reading direction: from top to bottom, from left to right.
        c                     | j                   j                  | j                   j                  | j                   j                  fS r   )r+   y0x0x1es    r   r^   z2Collection.sort_in_reading_order.<locals>.<lambda>   %    		16699affii/P r   r_   c                     | j                   j                  | j                   j                  | j                   j                  fS r   )r+   r   y1r   r   s    r   r^   z2Collection.sort_in_reading_order.<locals>.<lambda>   r   r   )is_horizontal_textr   rb   r"   s    r   sort_in_reading_orderz Collection.sort_in_reading_order   sG     ""OO  %P Q  OO  %P Qr   c                     | j                   s| j                  j                  d        | S | j                  j                  d        | S )zSort collection instances in a physical with text direction considered, e.g.
            for normal reading direction: from left to right.
        c                     | j                   j                  | j                   j                  | j                   j                  fS r   )r+   r   r   r   r   s    r   r^   z/Collection.sort_in_line_order.<locals>.<lambda>   r   r   r_   c                     | j                   j                  | j                   j                  | j                   j                  fS r   )r+   r   r   r   r   s    r   r^   z/Collection.sort_in_line_order.<locals>.<lambda>   r   r   )rw   r   rb   r"   s    r   sort_in_line_orderzCollection.sort_in_line_order   sG     $$OO  %P Q  OO  %P Qr   c                     g }| j                  dd      D ]#  }|j                          |j                  |       % | j                  |       y)ak  Sort instances in reading order, especially for instances in same row. Taking 
        natural reading direction for example: reading order for rows, from left to right 
        for instances in row. In the following example, A comes before B::

                         +-----------+
            +---------+  |           |
            |   A     |  |     B     |
            +---------+  +-----------+
        
        Steps:

            * Sort elements in reading order, i.e. from top to bottom, from left to right.
            * Group elements in row.
            * Sort elements in row: from left to right.
        T)rk   rG   N)r   r   r   r4   )r   r   rows      r   sort_in_reading_order_plusz%Collection.sort_in_reading_order_plus   sP      	..d4.P 	"C""$S!	" 	

9r   N)g        TF)FF)r<   r=   r>   r?   rA   rG   rV   floatri   boolry   r~   r   r   r   r   r   r   r   rC   rC   P   s    KB B&R$u $ $Ne  RV 5 T t 
D 
t 
r   rC   c                   ~    e Zd ZdZdefdZdefdZdedefdZdefdZ	dde
fd	Zd
 Zddej                  de
fdZy)ElementCollectionz$Collection of ``Element`` instances.r   c                 T    | j                   | j                   j                  |       yy)zUpdate parent bbox.N)r   
union_bboxr   r   s     r   _update_bboxzElementCollection._update_bbox  s#    ||#LL##A& $r   c                     |sy| j                   j                  |       | j                  |       | j                  | j                  |_        yy)zAppend an instance, update parent's bbox accordingly and set the parent of the added instance.

        Args:
            e (Element): instance to append.
        N)r   r0   r   r   r   r   s     r   r0   zElementCollection.append
  sB     &q!! ||#QX#r   nthc                     |sy| j                   j                  ||       | j                  |       | j                  |_        y)zInsert a Element and update parent's bbox accordingly.

        Args:
            nth (int): the position to insert.
            e (Element): the instance to insert.
        N)r   insertr   r   r   )r   r   r   s      r   r   zElementCollection.insert  s6     &sA&!<<r   c                 8    | j                   j                  |      S )zDelete the ``nth`` instance.

        Args:
            nth (int): the position to remove.

        Returns:
            Collection: the removed instance.
        )r   pop)r   r   s     r   r   zElementCollection.pop%  s     ""3''r   line_separate_thresholdc                 t   |s| j                   ryt        |       dk  ryt        | j                               dkD  ry| j                  rdnd\  }}| j	                  d      D ]U  }t        dt        |            D ];  }t        ||   j                  |   ||dz
     j                  |   z
        }||k\  s:  y W y)z5Whether contained elements are in flow layout or not.Fr   T)r   rZ   )rq   r   rF   )rw   r%   ry   r   r   rL   absr+   )r   r   cell_layoutidx0idx1r   rR   diss           r   is_flow_layoutz ElementCollection.is_flow_layout1  s     t44 t9a<t$$&')%  $66VF
d..d.C 	@C1c#h' @#a&++d+C!HMM$,??@11%@	@
 r   c                 j    t        t        fd| j                              }| j                  |      S )zvFilter instances contained in target bbox.

        Args:
            bbox  (fitz.Rect): target boundary box.
        c                 :    j                  | j                        S r   )containsr+   )r   r+   s    r   r^   z5ElementCollection.contained_in_bbox.<locals>.<lambda>M  s    dmmAFF+ r   )r@   filterr   rO   )r   r+   r   s    ` r   contained_in_bboxz#ElementCollection.contained_in_bboxF  s.     +T__> ?	~~i((r   r+   	thresholdc                    g g }}| j                   D ]  }|j                  |z  }|j                  r|j                  |       0t	        |j                         |j                  j                         z  d      }||k\  r|j                  |       ||j                  |        | j                  |      | j                  |      fS )aH  Split instances into two groups: one intersects with ``bbox``, the other not.

        Args:
            bbox (fitz.Rect): target rect box.
            threshold (float): It's intersected when the overlap rate exceeds this threshold. Defaults to 0.

        Returns:
            tuple: two group in original class type.
        rZ   )r   r+   is_emptyr0   r,   get_arearO   )r   r+   r   intersectionsno_intersectionsr    intersectionrj   s           r   split_with_intersectionz)ElementCollection.split_with_intersectionQ  s     +-b' 
	6H#==4/L$$ ''1|446x}}7M7M7OOQRSY&!((2$++H5
	6 ~~m,dnn=M.NNNr   N)F)gMbP?)r<   r=   r>   r?   r   r   r0   intr   r   r   r   r   r)   r*   r   r   r   r   r   r     sg    .'W '=w =
  
  
 	(c 	(U *)O499 O Or   r   )r?   r)   r   sharer   r   	algorithmr   r   r
   rC   r   r   r   r   <module>r      sF      ) <A" A"Hm mbfO
 fOr   