
    kh;                     H   d dl mZ d dlZd dlZdedefdZd Zd Z	de
d	ed
e
fdZde
de
d
e
fdZdeded
e
fdZ	 	 d#dej                  dedededef
dZdej                  dedefdZdej                  dededefdZd$dej                  dej                  ded ed!ef
d"Zy)%    )dequeNbbox_1bbox_2c                     | \  }}}}|\  }}}}	||z
  ||z
  z   t        ||      t        ||      z
  z
  }
|
dk  ry||z
  |	|z
  z   t        ||	      t        ||      z
  z
  }|dk  ry|
|z  S )Nr   )maxmin)r   r   x0y0x1y1u0v0u1v1whs               U/var/www/teggl/fontify/venv/lib/python3.12/site-packages/pdf2docx/common/algorithm.pyget_arear   	   s    NBBNBB 
B2b5SR[R45A!tA 
B2b5SR[R45A!tAQ3J    c                     t               }g }t        t        |             D ]>  }||v rt        t        | |            }|j	                  |       |j                  |       @ |S )zBreadth First Search graph (may be disconnected graph).
    
    Args:
        graph (list): GRAPH represented by adjacent list, [set(1,2,3), set(...), ...]
    
    Returns:
        list: A list of connected components
    )setrangelen_graph_bfs_from_nodeappendupdate)graphcounted_indexesgroupsiindexess        r   	graph_bfsr"      se     eOF3u: (*5!45gw'( Mr   c              #      K   t               }t               }|j                  |       |rI|j                         }||v r| |j	                  |       | |   D ]  }|j                  |        |rHyyw)zBreadth First Search connected graph with start node.
    
    Args:
        graph (list): GRAPH represented by adjacent list, [set(1,2,3), set(...), ...].
        start (int): Index of any start vertex.
    N)r   r   r   popleftadd)r   startsearch_queuesearchedcur_nodenodes         r   r   r   2   sv      7LuH
'')xX(O 	&D%	& s   A/A42A4Vnumindex_groupsc                   
 |dk  ryt        |dz        }| d   d   | |dz
     d   | d   d   c
| d| }| |d }t        t        
fd|            }t        t        fd|            }t        t        
fd	|            }t        t        fd
|            }	t        |||       t        |	||       t        ||	|       t	        |||       t	        |||z
  |       y)u[  Implementation of solving Rectangle-Intersection Problem.

    Performance::

        O(nlog n + k) time and O(n) space, where k is the count of intersection pairs.

    Args:
        V (list): Rectangle-related x-edges data, [(index, Rect, x), (...), ...].
        num (int): Count of V instances, equal to len(V).
        index_groups (list): Target adjacent list for connectivity between rects.
    
    Procedure ``detect(V, H, m)``::
    
        if m < 2 then return else
        - let V1 be the first ⌊m/2⌋ and let V2 be the rest of the vertical edges in V in the sorted order;
        - let S11 and S22 be the set of rectangles represented only in V1 and V2 but not spanning V2 and V1, respectively;
        - let S12 be the set of rectangles represented only in V1 and spanning V2; 
        - let S21 be the set of rectangles represented only in V2 and spanning V1
        - let H1 and H2 be the list of y-intervals corresponding to the elements of V1 and V2 respectively
        - stab(S12, S22); stab(S21, S11); stab(S12, S21)
        - detect(V1, H1, ⌊m/2⌋); detect(V2, H2, m − ⌊m/2⌋)
       Ng       @r      c                     | d   d   k  S Nr1   r/    itemXs    r   <lambda>z*solve_rects_intersection.<locals>.<lambda>z   s    DGAJM r   c                     | d   d   k\  S r3   r4   )r6   X1s    r   r8   z*solve_rects_intersection.<locals>.<lambda>{       DGAJN r   c                     | d   d   kD  S Nr1   r   r4   r5   s    r   r8   z*solve_rects_intersection.<locals>.<lambda>|   s    DGAJqL r   c                     | d   d   k  S r=   r4   )r6   X0s    r   r8   z*solve_rects_intersection.<locals>.<lambda>}   r;   r   )intlistfilter_stabsolve_rects_intersection)r+   r,   r-   
center_posleftrightS11S12S22S21r7   r?   r:   s             @@@r   rD   rD   X   s    . Qw SWJ!R!JqL/"-quRyIB2 Qz?DjkNE v149
:C
v2D:
;C
v0%9
:C
v2E;
<C 
#sL!	#sL!	#sL! TJLAUC
NLAr   S1S2c                    | r|sy| j                  d        |j                  d        d\  }}|t        |       k  r:|t        |      k  r*| |   \  }}}||   \  }}	}|d   |	d   k  rw|}
|
t        |      k  ra||
   d   d   |d   k  rPt        t        |dz        t        ||
   d	   dz        |       |
dz  }
|
t        |      k  r||
   d   d   |d   k  rP|dz  }nv|}
|
t        |       k  ra| |
   d   d   |	d   k  rPt        t        | |
   d	   dz        t        |dz        |       |
dz  }
|
t        |       k  r| |
   d   d   |	d   k  rP|dz  }|t        |       k  r|t        |      k  r(yyyy)
u  Check interval intersection in y-direction.
    
    Procedure ``stab(A, B)``::
        i := 1; j := 1
        while i ≤ |A| and j ≤ |B|
            if ai.y0 < bj.y0 then
            k := j
            while k ≤ |B| and bk.y0 < ai.y1
                reportPair(air, bks)
                k := k + 1
            i := i + 1
            else
            k := i
            while k ≤ |A| and ak.y0 < bj.y1
                reportPair(bjs, akr)
                k := k + 1
            j := j + 1
    Nc                     | d   d   S Nr1   r4   r6   s    r   r8   z_stab.<locals>.<lambda>       T!WQZ r   )keyc                     | d   d   S rP   r4   rQ   s    r   r8   z_stab.<locals>.<lambda>   rR   r   r   r   r1      r/   r   )sortr   _report_pairr@   )rL   rM   r-   r    jma_nbks              r   rC   rC      s   & R GG'G(GG'G(DAq
CG)#b'	Q%1aQ%1aQ4!A$;ACG)1aad 2S1Xs2a58A:EQ CG)1aad 2 FAACG)1aad 2SAq!_c!A#hEQ CG)1aad 2 FA CG)#b'	)	)r   r    rY   c                 T    ||    j                  |       ||   j                  |        y)z add pair (i,j) to adjacent list.N)r%   )r    rY   r-   s      r   rX   rX      s&    OOr   
img_binarymin_wmin_hmin_dxmin_dyc                     dt         j                  dt        dt        dt        dt        dt        dt        ffdg } | d	|||||
       |S )a  Split image with recursive xy-cut algorithm.
    
    Args:
        img_binary (np.array): Binarized image with interesting region (255) and empty region (0).
        min_w (float): Ignore bbox if the width is less than this value.
        min_h (float): Ignore bbox if the height is less than this value.
        min_dx (float): Merge two bbox-es if the x-gap is less than this value.
        min_dy (float): Merge two bbox-es if the y-gap is less than this value.
    
    Returns:
        list: bbox (x0, y0, x1, y1) of split blocks.
    arrtop_leftresrb   rc   rd   re   c                    |\  }}| j                   \  }	}
t        j                  | dk(  d      }t        |||      }|sy |\  }}t	        ||      D ]  \  }}| ||d|
f   }t        j                  |dk(  d      }t        |||      }|s;|\  }}t        |      dk(  r(|j                  ||d   z   ||z   ||d   z   ||z   f       vt	        ||      D ](  \  }}| ||||f   }||z   ||z   f} |||||||       *  y )N   r1   axisr   )shapenpcount_nonzero_split_projection_profilezipr   r   )rg   rh   ri   rb   rc   rd   re   r	   r
   r   r   
projectionpos_yarr_y0arr_y1r0r1x_arrpos_xarr_x0arr_x1c0c1y_arrxy_cuts                           r   r   z recursive_xy_cut.<locals>.xy_cut   sF   Byy1%%c3hQ7
)*eVDf &&) 	KFB2qs
OE))%*1=J-j%HE( #NFF6{A~

BvayL"R%F1Ir"uEF ff- KBBrE2b5L)rE2b5>uhUE66JK	Kr   rU   )rg   rh   ri   rb   rc   rd   re   )ro   arraytuplerA   float)ra   rb   rc   rd   re   ri   r   s         @r   recursive_xy_cutr      sm    K288 Ke K KK',K5:KCHK< C
zFuVFDJr   
arr_values	min_valuemin_gapc                 .   t        j                  | |kD        d   }t        |      sy|dd |dd z
  }t        j                  ||kD        d   }||   }||dz      }t        j                  |d|d         }t        j                  ||d         }	|	dz  }	||	fS )uU  Split projection profile:

    ```
                              ┌──┐
         arr_values           │  │       ┌─┐───
             ┌──┐             │  │       │ │ |
             │  │             │  │ ┌───┐ │ │min_value
             │  │<- min_gap ->│  │ │   │ │ │ |
         ────┴──┴─────────────┴──┴─┴───┴─┴─┴─┴───
         0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
    ```

    Args:
        arr_values (np.array): 1-d array representing the projection profile.
        min_value (float): Ignore the profile if `arr_value` is less than `min_value`.
        min_gap (float): Ignore the gap if less than this value.

    Returns:
        tuple: Start indexes and end indexes of split groups.
    r   Nr1   r0   )ro   wherer   insertr   )
r   r   r   	arr_indexarr_diffarr_diff_indexarr_zero_intvl_startarr_zero_intvl_end	arr_startarr_ends
             r   rq   rq      s    , I-.q1Iy>6
 }y2.HXXhw./2N$^4">!#34 		,a1>Iii,im<GqLGgr   bboxc                    |\  }}}}t        j                  | j                  t         j                        }| ||||f   |||||f<   t	        j
                  |t        j                  t        j                        \  }	}
t        j                  |
ddddf   dk(        d   }t        j                  t        j                  |
ddddf   |            d   }t        j                  t        j                  |
ddddf   |            d   }d g g g c}}|D ]E  }t	        j                  |	|         \  }}}}||k  s||k  r+|j                  ||||z   ||z   f       G |D ];  }|D ]4  }||k(  r	 ||      sj                  |       |j                  |       6 = fd}|D ]P  }t	        j                  |	|         \  }}}}||||z   ||z   f}||k  s||k  r7 ||      r@|j                  |       R |S )a  Inner contours of current region, especially level 2 contours of the default opencv tree hirerachy.

    Args:
        img_binary (np.array): Binarized image with interesting region (255) and empty region (0).
        bbox (tuple): The external bbox.
        min_w (float): Ignore contours if the bbox width is less than this value.
        min_h (float): Ignore contours if the bbox height is less than this value.

    Returns:
        list: A list of bbox-es of inner contours.
    dtyper   NrV   r0   c                 R    | \  }}}}|\  }}}}	||k\  xr ||k\  xr ||k  xr |	|k  S )Nr4   )
bbox1bbox2r	   r
   r   r   r   r   r   r   s
             r   containsz inner_contours.<locals>.containsA  sA    BBBB2v6"b&6RV6B6r   c                 *    D ]  } ||       s y y)NTFr4   )r   level_1_bboxr   res_level_1s     r   contained_in_concerned_level_1z6inner_contours.<locals>.contained_in_concerned_level_1T  s!    ' 	9Ld+D	9r   )ro   zerosrn   uint8cvfindContours	RETR_TREECHAIN_APPROX_SIMPLEr   isinboundingRectr   )ra   r   rb   rc   r	   r
   r   r   rg   contours	hierarchylevel_0level_1level_2level_1_bbox_listri   r    xyr   r   r   r   r   level_2_bboxr   r   s                            @@r   inner_contoursr   !  s    NBB
((:##288
4C"2b5"R%<0C2r"u//#r||R=S=STHi hhy1Q'+,Q/Ghhrwwy1Q/9:1=Ghhrwwy1Q/9:1=G
7
 +-b"'{C 3__Xa[1
1aU7agx  !Q!QqS!123
 # "& 	"Ee|Xu%""5)

5!		""
  !__Xa[1
1a1ac1Q3'U7agx),7

< ! Jr   
img_sourcegapdwdhc           	         |j                   \  }}|xs t        t        |dz        d      }|xs t        t        |dz        d      }dt        j                  ||z   |z   ||z   |z   dft        j
                        z  }| |||z   ||z   |z   d|ddf<   t        j                  |dk(  d      }t        |      D ]:  \  }	}
t        |
|z  |z        }d||	|z   |z   ||z   ||z   t        |      z   ddf<   < t        j                  |dk(  d      }t        |      D ]%  \  }	}
t        |
|z  |z        }d|||z
  ||	ddf<   ' |S )	u  Projection profile along x and y direction.

    ```
           ┌────────────────┐
        dh │                │
           └────────────────┘
                 gap
           ┌────────────────┐ ┌───┐
           │                │ │   │
         h │     image      │ │   │
           │                │ │   │
           └────────────────┘ └───┘
                    w           dw
    ```

    Args:
        img_source (np.array): Source image, e.g. RGB mode.
        img_binary (np.array): Binarized image.
        gap (int, optional): Gap between sub-graph. Defaults to 5.
        dw (int, optional): Graph height of x projection profile. Defaults to None.
        dh (int, optional): Graph height of y projection profile. Defaults to None.

    Returns:
        np.array: The combined graph data.
    rV      rk   r   r   Nr1   rl   )rn   r   r@   ro   onesr   rp   	enumerate)r   ra   r   r   r   r   r   rg   valsr    valcrs                r   xy_project_profiler   c  si   6 DAq		 s3qs8R B		 s3qs8R B
bggqtCx2c1-RXX>
>C $.C3r#vax1a  JO!4DD/ 13AbM/0AbDHaeAcE#a&L(!+,1
 JO!4DD/ 3AbMBqDGQM Jr   )        r         .@r   )   NN)collectionsr   numpyro   cv2r   r   r   r"   r   rA   r@   rD   rC   rX   r   r   r   rq   r   r   r4   r   r   <module>r      s/     E % $.&L.Bt .B .B4 .Bb(T (d ( (V3 # D " 25480 00+00 0.30f' 'U 'E 'T?bhh ?U ?% ?u ?D/"(( /rxx /S /s /\_ /r   