
    kh"                     j    d Z ddlmZmZmZmZ ddlmZ ddlmZm	Z	 ddl
mZ ddl
mZ  G d d	e	      Zy
)zA group of ``Shape`` instances.   )ShapeStrokeFill	Hyperlink   )RectType)
CollectionElementCollection)share)	constantsc                       e Zd ZdZdefdZdefdZed        Z	ed        Z
ed        Zed	        Zed
        Zed        ZdedefdZdefdZd Zed        Zd Zy)Shapesz= A collection of ``Shape`` instances: ``Stroke`` or ``Fill``.rawsc                     | j                          |D ]>  }d|v rt        |      }nd|v rt        |      }nt        |      }| j	                  |       @ | S )z;Clean current instances and restore them from source dicts.starturi)resetr   r   r   append)selfr   rawshapes       Q/var/www/teggl/fontify/venv/lib/python3.12/site-packages/pdf2docx/shape/Shapes.pyrestorezShapes.restore   sV    

 	C#~s#!#S	KK	     ec                      y)z override. Do nothing.N )r   r   s     r   _update_bboxzShapes._update_bbox   s    r   c                 X    t        t        d | j                              }t        |      S )zJ Stroke Shapes, including table border, text underline and strike-through.c                 "    t        | t              S N)
isinstancer   r   s    r   <lambda>z Shapes.strokes.<locals>.<lambda>%   s    *UF3 r   listfilter
_instancesr   r   	instancess     r   strokeszShapes.strokes!   s+     3T__F G	i  r   c                 X    t        t        d | j                              }t        |      S )z3 Fill Shapes, including cell shading and highlight.c                 j    t        | t              xr" | j                  t        j                  d      k7  S )N)r   r   r   )r"   r   colorr   	rgb_valuer#   s    r   r$   z!Shapes.fillings.<locals>.<lambda>.   s*    *UD1 8uw77 r   r%   r)   s     r   fillingszShapes.fillings)   s0     89=J K	 i  r   c                 X    t        t        d | j                              }t        |      S )z Hyperlink Shapes.c                 "    t        | t              S r!   )r"   r   r#   s    r   r$   z#Shapes.hyperlinks.<locals>.<lambda>7   s    *UI6 r   r%   r)   s     r   
hyperlinkszShapes.hyperlinks3   s+     6I J	i  r   c                 X    t        t        d | j                              }t        |      S )zPotential table borders.c                 @    | j                  t        j                        S r!   )has_potential_typer   BORDERr#   s    r   r$   z&Shapes.table_strokes.<locals>.<lambda>?   s    %228??C r   r&   r'   r(   r
   r)   s     r   table_strokeszShapes.table_strokes;   s+     CT__V W	 ++r   c                 X    t        t        d | j                              }t        |      S )zPotential table shadings.c                 @    | j                  t        j                        S r!   )r6   r   SHADINGr#   s    r   r$   z'Shapes.table_fillings.<locals>.<lambda>G   s    %2283C3CD r   r8   r)   s     r   table_fillingszShapes.table_fillingsC   s+     DdooW X	 ++r   c                 \    d }t        t        || j                              }t        |      S )zcPotential text style based shapes,
        e.g. underline, strike-through, highlight and hyperlink.c                    | j                  t        j                        xsa | j                  t        j                        xs@ | j                  t        j                        xs | j                  t        j
                        S r!   )r6   r   	HIGHLIGHT	UNDERLINESTRIKE	HYPERLINKr#   s    r   fz#Shapes.text_style_shapes.<locals>.fO   sj    ++H,>,>? A,,X-?-?@A,,X__=A ,,X-?-?@Ar   )setr'   r(   r
   )r   rD   r*   s      r   text_style_shapeszShapes.text_style_shapesK   s*    	A
 q$//23	 ++r   max_border_widthshape_min_dimensionc                 b   | j                   sy| j                  j                  }g }| D ]  }t        |j                  j                  |j                  j
                        |k  r;|j                  j                  |      }|j                  rc|j                  |j                  |              | j                  |      }g }|D ]J  }	t        |	t              r'|	j                  |      }
|j                  |
r|
n|	       :|j                  |	       L | j                  |       | j                          y)a  Clean rectangles.

        * Delete shapes out of page.
        * Delete small shapes (either width or height).
        * Merge shapes with same filling color.
        * Detect semantic type.

        Args:
            max_border_width (float): The max border width.
            shape_min_dimension (float): Ignore shape if both width and height
                is lower than this value.
        N)r(   parentbboxmaxwidthheight	intersectis_emptyr   update_bbox_merge_shapesr"   r   	to_stroker   _parse_semantic_type)r   rG   rH   	page_bboxcleaned_shapessbbox_in_pagemerged_shapesshapesr   strokes              r   clean_upzShapes.clean_upX   s     KK$$	 	?A166<</0CCX66++I6L$$h!!!--"=>		? **>: " 	%E%&)9:fE:e$	% 	

6 	!!#r   tablesc                 p   |sy|D cg c]  }g  }}g }| j                   D ]  }|j                  t        j                        s|j                  t        j                        r|j                  |       St        ||      D ]c  \  }}|j                  j                  |j                        r|j                  |        |j                  j                  |j                        rde |j                  |        t        ||      D ]  \  }}|s	|j                  |        | j                  |       yc c}w )zAdd Shape to associated cells of given tables.

        Args:
            tables (list): A list of TableBlock instances.
        N)r(   equal_to_typer   r7   r<   r   ziprK   contains
intersectsassign_shapesr   )r   r]   _shapes_in_tablesrZ   r   tableshapes_in_tables           r   assign_to_tableszShapes.assign_to_tables   s    v )//1B//__ 	%E""8??3u7J7J8K[K[7\e$*-f6F*G %&::&&uzz2#**51 ..uzz:% e$%	%* '*&2B&C 	1"E?"H0	1
 	

69 0s   	D3c                     d}| j                   D ]  }|j                  ||        d}| j                  D ]  }|j                  ||        d}| j                  D ]  }|j                  ||        y)aZ  Plot shapes for debug purpose.
        Different colors are used to display the shapes in detected semantic types, e.g.
        yellow for text based shape (stroke, underline and highlight). Due to overlaps
        between Stroke and Fill related groups, some shapes are plot twice.

        Args:
            page (fitz.Page): pdf page.
        )?g?rj   )    rk   rk   )r   r   rk   N)r=   plotr9   rF   )r   pager.   r   s       r   rl   zShapes.plot   sq     ,((AE%**T5*AA ''@ED%)@@ ++DEUZZe-DDr   c                    t        t        d |             }d }t        |      j                  |      }g }|D ]  }|j                  j                         }t        d |D              }||z  t        j                  k\  r.|j                  |d   j                  |j                               s|j                  |        t        d |       }|j                  |       |S )znMerge shapes if same filling color. Note the merged bbox must match source shapes
        as more as possible.c                     | j                    S r!   )is_determinedr#   s    r   r$   z&Shapes._merge_shapes.<locals>.<lambda>   s    e111 r   c                     | j                   |j                   k(  xr8 | j                  j                  |j                  t        j
                              S r!   )r.   rK   rb   get_expand_bboxr   	TINY_DIST)abs     r   rD   zShapes._merge_shapes.<locals>.f   s:    77AGG#a(9(9!:K:KIL_L_:`(aar   c              3   P   K   | ]  }|j                   j                            y wr!   )rK   get_area).0r   s     r   	<genexpr>z'Shapes._merge_shapes.<locals>.<genexpr>   s     DU5::..0Ds   $&rk   c                 @    | j                  t        j                        S r!   )r_   r   rC   r#   s    r   r$   z&Shapes._merge_shapes.<locals>.<lambda>   s    %*=*=h>P>P*Q r   )r&   r'   r	   grouprK   rw   sumr   FACTOR_ALMOSTr   rQ   extend)	rZ   normal_shapesrD   groupsrY   r{   merged_areasum_arear3   s	            r   rR   zShapes._merge_shapes   s    
 V16; <	bM*003 	,E**--/KDeDDH#y'>'>>$$U1X%9%9%**%EF$$U+	, QSYZ
Z(r   c                     | j                   j                  }|j                          | j                  D ]  }|j	                  |        y)a   Detect shape type based on the position to text blocks.

        .. note::
            Stroke shapes are grouped on connectivity to each other, but in some cases,
            the gap between borders and underlines/strikes are very close, which leads
            to an incorrect table structure. So, it's required to distinguish them in
            advance, though we needn't to ensure 100% accuracy. They are finally determined
            when parsing table structure and text format.
        N)_parentblockssort_in_reading_orderr(   parse_semantic_type)r   r   r   s      r   rT   zShapes._parse_semantic_type   sB     $$$$& __ 	.E%%f-	.r   N)__name__
__module____qualname____doc__r&   r   r   r   propertyr+   r0   r3   r9   r=   rF   floatr\   rh   rl   staticmethodrR   rT   r   r   r   r   r   
   s    G4  %U % ! ! ! ! ! ! , , , , 	, 	,'$ '$5 '$T%d %PE6  6.r   r   N)r   r   r   r   r   common.sharer   common.Collectionr	   r
   commonr   r   r   r   r   r   <module>r      s)    % 1 1 # =  g. g.r   