
    kh                        d dl mZmZmZmZmZ d dlmZmZ d dl	Z
d dlmZ d dlmZ ddlmZmZ 	 	 ddee   d	eeegef   d
eee   gef   dedee   f
dZ	 ddee   dededee   fdZ	 ddee   dedeee   eee      f   fdZy)    )ListUnionAnyCallableIterable)partialreduceN)
csr_matrix)connected_components   )BaseLayoutElement	TextBlocksequencescoring_funcaggregation_funcdefault_score_valuereturnc           
         |d }t        |       }t        j                  ||f      |z  }t        |      D ]+  }t        |dz   |      D ]  } || |   | |         ||   |<    - t	        |      }t        |dd      \  }	}
g }t        |	      D ]E  }t        j                  |
|k(        d   }|j                   ||D cg c]  }| |   	 c}             G |S c c}w )a  Perform connected componenet analysis for any 1D sequence based on
    the scoring function and the aggregation function.
    It will generate the adjacency_matrix for the 1D sequence object using
    the provided `scoring_func` and find the connected componenets.
    The `aggregation_func` will be used to aggregate all elements within
    identified components (when not set, it will be the identity function).

    Args:
        sequence (List[Any]):
            The provided 1D sequence of objects.
        scoring_func (Callable[[Any, Any], int]):
            The scoring function used to construct the adjacency_matrix.
            It should take two objects in the sequence and produe a integer.
        aggregation_func (Callable[[List[Any]], Any], optional):
            The function used to aggregate the elements within an identified
            component.
            Defaults to the identify function: `lambda x: x`.
        default_score_value (int, optional):
            Used to set the default (background) score values that should be
            not considered when running connected component analysis.
            Defaults to 0.

    Returns:
        List[Any]: A list of length n - the number of the detected componenets.
    c                     | S N )xs    _/var/www/teggl/fontify/venv/lib/python3.12/site-packages/layoutparser/tools/shape_operations.py<lambda>z=generalized_connected_component_analysis_1d.<locals>.<lambda>:   s    Q        FT)csgraphdirectedreturn_labelsr   )lennponesranger
   r   whereappend)r   r   r   r   seq_lenadjacency_matrixijgraphn_componentslabelsgrouped_sequencecomp_idxelement_idxs                 r   +generalized_connected_component_analysis_1dr0      s   @ &(mGww125HH7^ Lq1ug& 	LA%1(1+x{%KQ"	LL '(E/TL& ,' Vhhv1215 0{1S!(1+1S TUV  2Ts   <Clayoutx_tolerancey_tolerancec                 H     d }t         t        |||       fd      }|S )aF  Perform line detection based on connected component analysis.

    The is_line_wise_close is the scoring function, which returns True
    if the y-difference is smaller than the y_tolerance AND the
    x-difference (the horizontal gap between two boxes) is also smaller
    than the x_tolerance, and False otherwise.

    All the detected components will then be passed into aggregation_func,
    which returns the overall union box of all the elements, or the line
    box.

    Args:
        layout (Iterable):
            A list (or Layout) of BaseLayoutElement
        x_tolerance (int, optional):
            The value used for specifying the maximum allowed y-difference
            when considered whether two tokens are from the same line.
            Defaults to 10.
        y_tolerance (int, optional):
            The value used for specifying the maximum allowed horizontal gap
            when considered whether two tokens are from the same line.
            Defaults to 10.

    Returns:
        List[BaseLayoutElement]: A list of BaseLayoutElement, denoting the line boxes.
    c                 V   | j                   j                  d   }|j                   j                  d   }| j                   j                  dd d   \  }}|j                   j                  dd d   \  }}	t        ||z
        |k  xr' t	        t        ||	z
        t        ||z
              |k  S )Nr   r   r   )blockcentercoordinatesabsmin)
token_atoken_br2   r3   y_ay_ba_lefta_rightb_leftb_rights
             r   is_line_wise_closez1simple_line_detection.<locals>.is_line_wise_closen   s    mm""1%mm""1%!--33ADqD9!--33ADqD9 c	Nk) QC()3w/?+@A[P	
r   )r3   r2   c                 J    t        d   j                  j                  |       S Nr   r	   	__class__unionseqr1   s    r   r   z'simple_line_detection.<locals>.<lambda>   s    VF1I,?,?,E,Es%K r   r   r   )r0   r   )r1   r2   r3   rC   detected_liness   `    r   simple_line_detectionrM   P   s5    <

  AK[
 LN r   union_groupc                 :     |r fd}nd}t         d |      }|S )a0  Group textblocks based on their category (block.type).

    Args:
        layout (Iterable):
            A list (or Layout) of BaseLayoutElement
        union_group (bool):
            Whether to union the boxes within each group.
            Defaults to True.

    Returns:
        List[TextBlock]: When `union_group=True`, it produces a list of
            TextBlocks, denoting the boundaries of each texblock group.
        List[List[TextBlock]]: When `union_group=False`, it preserves
            the elements within each group for further processing.
    c                 J    t        d   j                  j                  |       S rE   rF   rI   s    r   r   z4group_textblocks_based_on_category.<locals>.<lambda>   s    vfQi.A.A.G.G'M r   Nc                 4    | j                   |j                   k(  S r   )type)abs     r   r   z4group_textblocks_based_on_category.<locals>.<lambda>   s    !&&AFF"2 r   rK   )r0   )r1   rN   r   detected_group_boxess   `   r   "group_textblocks_based_on_categoryrV      s0    & MF2)  r   rE   )
   rW   )T)typingr   r   r   r   r   	functoolsr   r	   numpyr!   scipy.sparser
   scipy.sparse.csgraphr   elementsr   r   intr0   rM   boolrV   r   r   r   <module>r`      s    8 7 %  # 5 3 48 	43i4C:s?+4 S	{C/04 	4
 
#Y4p TV6&'6696MP6	
6t 6: Y .2 
4	?Di112 r   