
    kh<                         d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZ ddlmZ  G d d      Zy)a=  Parsing table blocks.

* ``lattice table``: explicit borders represented by strokes.
* ``stream table`` : borderless table recognized from layout of text blocks.

Terms definition:

* From appearance aspect, we say ``stroke`` and ``fill``, the former looks like a line, 
  while the later an area.
* From semantic aspect, we say ``border`` (cell border) and ``shading`` (cell shading).
* An explicit border is determined by a certain stroke, while a stroke may also represent 
  an underline of text.
* An explicit shading is determined by a fill, while a fill may also represent a highlight 
  of text.
* Border object is introduced to determin borders of stream table. Border instance is a 
  virtual border adaptive in a certain range, then converted to a stroke once finalized, 
  and finally applied to detect table border.
   )	constants)Element)
Collection)Blocks)Shapes)Lines   )TableStructure)BorderBorders)Cellc            	           e Zd ZdZd ZdededefdZdededefdZed	e	fd
       Z
ed	e	dededefd       Zed        Zed	e	defd       Zy)TablesConstructorz7Object parsing ``TableBlock`` for specified ``Layout``.c                 V    || _         |j                  | _        |j                  | _        y N)_parentblocks_blocksshapes_shapes)selfparents     \/var/www/teggl/fontify/venv/lib/python3.12/site-packages/pdf2docx/table/TablesConstructor.py__init__zTablesConstructor.__init__$   s    }}}}    connected_border_tolerancemin_border_clearancemax_border_widthc                 $   | j                   sydt        fd}| j                   j                  j                  ||      } ||      }| j                   j                  }t               }||d}|D ]j  }	|j                  |	j                        }
t        |	fi |j                  |
      j                         }|sJ|j                          |j                  |       l | j                  j                  |       | j                   j                  |       y)ac  Parse table with explicit borders/shadings represented by rectangle shapes.

        Args:
            connected_border_tolerance (float): Two borders are intersected if the gap lower than this value.
            min_border_clearance (float): The minimum allowable clearance of two borders.
            max_border_width (float): Max border width.
        N	instancesc                     d }t        |       j                  |      }g }|D ]:  }t        |      dk(  r|d   }nt        |d       }|d   }|j	                  |       < |S )z4Delete group when it's contained in a certain group.c                     | j                   j                  |j                         xs% |j                   j                  | j                         S r   )bboxcontains)abs     r   <lambda>zJTablesConstructor.lattice_tables.<locals>.remove_overlap.<locals>.<lambda>:   s-    qvvqvv6Q!&&//!&&:Q r   r	       c                 6    | j                   j                         S r   )r#   get_area)instances    r   r'   zJTablesConstructor.lattice_tables.<locals>.remove_overlap.<locals>.<lambda>D   s    X]]-C-C-E r   )key)r   grouplensortedappend)r    fungroupsunique_groupsgroup_instancesr+   sorted_groups          r   remove_overlapz8TablesConstructor.lattice_tables.<locals>.remove_overlap7   s|     RC	*005FM#) 
/'*.q1H $*/E$GL+B/H$$X.
/ ! r   )dxdyr   r   )r   listtable_strokesgroup_by_connectivitytable_fillingsr   contained_in_bboxr#   r
   parseto_table_blockset_lattice_table_blockr1   r   assign_to_tables)r   r   r   r   r7   grouped_strokesfillstablessettingsstrokesgroup_fillstables               r   lattice_tablesz TablesConstructor.lattice_tables*   s     ||V	!T 	!* ,,44""&@E_"` 	 )9 ++ $8 0
 ' 	%G11',,?K #77h7==kJYY[E--/e$	% 	%%f-%%f-r   line_separate_thresholdc                      j                   j                  } j                   j                  } j                  j	                  ||      } j
                  j                  \  }} fd}	t               }
||d}|D ]!  }|st        |D cg c]  }|j                  j                   c}      }t        |D cg c]  }|j                  j                   c}      }t        |D cg c]  }|j                  j                   c}      }t        |D cg c]  }|j                  j                   c}      } |	||      \  }}||||f}||||f}t        j                  ||      }t!               j#                  |      }|j%                  |j                        }|j'                  |j                  t(        j*                        \  }}|s|st        j-                  |      rf j/                  ||||      }|s~|j1                          t3        |fi |j5                  |      j7                         }t9         j
                  t:              r0|j<                  |j>                  z  dk(  r|d   d   j@                  |jC                          |
jE                  |       $  j                  jG                  |
        j                   jG                  |
       yc c}w c c}w c c}w c c}w )zParse table with layout of text/image blocks, and update borders with explicit borders 
        represented by rectangle shapes.

        Refer to ``lattice_tables`` for arguments description.
        c                     }}j                   D ]e  }|j                  j                  | k  r|j                  j                  }|j                  j                  |kD  sL|j                  j                  } ||fS  ||fS )a  find the vertical boundaries of table in y-range [y0, y1]:
                - the bottom of block closest to y0
                - the top of block closest to y1

                ```
                +-------------------------+  <- Y0

                +--------------+
                +--------------+  <- y_lower

                +------------------------+  <- y0
                |         table          |
                +------------------------+  <- y1

                +-------------------------+ <- y_upper
                +-------------------------+

                +---------------------------+ <- Y1
                ```
            )r   r#   y1y0)rP   rO   y_lowery_upperblockY0Y1r   s        r   top_bottom_boundariesz>TablesConstructor.stream_tables.<locals>.top_bottom_boundaries}   sw    *  "2WG ::==2%w ::==2%#jjmmGG## G##r   r:   )	thresholdr	   r(   N)$r   r<   r>   r   collect_stream_linesr   r#   r   minx0rP   maxx1rO   r   _outer_bordersr   update_bboxr?   split_with_intersectionr   FACTOR_A_FEW_is_simple_structure_stream_strokessort_in_reading_orderr
   r@   rA   
isinstancer   num_colsnum_rowsbg_colorset_stream_table_blockr1   rC   )r   r   r   rL   r<   r>   tables_linesX0X1rV   rF   rG   table_linesrectrZ   rP   r\   rO   	y0_margin	y1_margin
inner_bbox
outer_bboxouter_bordersexplicit_strokesexplicit_shadings_rH   rJ   rT   rU   s   `                           @@r   stream_tableszTablesConstructor.stream_tablesj   s~    2244 ||88I`a **BB	$B $8 0

 ( '	!K{;tdiill;<B{;tdiill;<B{;tdiill;<B{;tdiill;<B $9R#@ Iyb"b)JiY7J-<<ZTM 9((4D - ? ?		 J#1#I#I$))_h_u_u#I#v q &)9!66{CX **;GWYjkGH ))+"77h7==>OP__aE $,,-u~~-q0U1Xa[5I5I5Q((*MM% O'	!T 	%%f-%%f-Q <;;;s   K
<K 
)K%
K*
linesc                     t        | j                               }|dk(  ry|dk(  r3t        | j                               t        | j                               k(  S y)zWhether current lines represent a simple table:        
        * only one column -> always flow layout in docx; or
        * two columns: lines are aligned in each row -> simple paragraph in docx
        r	   Tr   F)r/   group_by_columnsgroup_by_physical_rowsgroup_by_rows)rw   nums     r   ra   z&TablesConstructor._is_simple_structure   sR     %((*+6!Vu3356E<O<O<Q8RRRr   rr   rs   rt   c                    t               }|j                  |       t        j                  | |      }|j                  |       |j	                  ||       t               }|D ]!  }|j                  |j                                # |S )av  Parsing borders mainly based on content lines contained in cells, 
        and update borders (position and style) with explicit borders represented 
        by rectangle shapes.
        
        Args:
            lines (Lines): lines contained in table cells.
            outer_borders (tuple): Boundary borders of table, ``(top, bottom, left, right)``.
            explicit_strokes (Shapes): Showing borders in a stream table; can be empty.
            explicit_shadings (Shapes): Showing shadings in a stream table; can be empty.
        
        Returns:
            Shapes: Parsed strokes representing table borders.
        )r   extendr   _inner_bordersfinalizer   r1   	to_stroke)rw   rr   rs   rt   bordersinner_bordersresborders           r   rb   z!TablesConstructor._stream_strokes   s     ) 	}% *88N}% 	)+<= h 	+FJJv'')*	+ 
r   c                 B   | \  }}}}|\  }}}}	t        d||fd      }
t        d||	fd      }t        d||fd      }t        d||fd      }|
j                  ||f       |j                  ||f       |j                  |
|f       |j                  |
|f       |
|||fS )ai  Initialize outer Border instances according to lower and upper bbox-es.

        ::
            +--------------------------------->
            |
            | Y0 +------------------------+     + outer bbox
            |    |                        |     |
            |    | y0+----------------+   |     |
            |    |   |                |   +<----+
            |    |   |                +<--------+ inner bbox
            |    | y1+----------------+   |
            |    |   x0               x1  |
            | Y1 +------------------------+
            |    X0                       X1
            v
        HTF)border_range	referenceHBVLVR)r   set_boundary_borders)rp   rq   rZ   rP   r\   rO   rj   rT   rk   rU   topbottomleftrights                 r   r]   z TablesConstructor._outer_borders  s    $ $BB#BBB8uEB8uEB8uEB8uE 	  $/##T5M2!!3-0""C=1VT5))r   c           	      z   | j                         }|D cg c]"  }|j                  t        j                        $ }}t	        |      }|dk  }|dk\  r|D ]	  }d|_         t               }d}	|\  }
}}}t        |      D ]3  }|dk(  r|n|	}||dz
  k(  r|}	nY||   j                  j                  }||dz      j                  j                  }t        d||f|
|fd      }	|j                  |	       ||   }t	        |      }|dk(  rd}t        |      D ]  }|dk(  r|
n|}||dz
  k(  r|}nY||   j                  j                  }||dz      j                  j                  }t        d	||f||	f|      }|j                  |       t        j!                  ||   ||||	f      }|j#                  |        6 |S c c}w )
a  Calculate the surrounding borders of given ``lines``. These borders construct table cells. 

        Two purposes of stream table: 

        * Rebuild layout, e.g. text layout with two columns, and
        * parsing real borderless table.

        It's contradictory that the former needn't to deep into row level, just ``1xN`` table 
        convenient for layout recreation; instead, the later should, ``MxN`` table for each 
        cell precisely. So, the principle determining stream tables borders:

        * Vertical borders contributes the table structure, so ``border.is_reference=False``.
        * Horizontal borders are for reference when ``n_column=2``, in this case ``border.is_reference=True``.
        * During deeper recursion, h-borders become outer borders: it turns valuable when count 
          of detected columns >= 2.
        
        Args:
            lines (Lines): Lines in table cells.
            outer_borders (tuple): Boundary borders of table region.
        )factorr   FNr(   r	   VI)border_typer   r   r   HI)ry   r{   r   r`   r/   is_referencer   ranger#   r\   rZ   r   r1   rO   rP   r   r   r~   )rw   rr   
cols_lines	col_linesgroup_linescol_numr   r   r   r   TOPBOTTOMLEFTRIGHTir   rZ   r\   
rows_linesrow_numr   jr   rP   rO   borders_s                             r   r   z TablesConstructor._inner_borders&  s	   . ++-
_ijR[y..i6L6L.Mjj j/z A:' ,&+#, ) #0 VT5w -	)A a44UD '!)|UU]''**!_)),,4"$b &M#% u% %QJ*oG!|X F7^ ) Tcv gai<&#A++..B#AaC--00B
 $&("X!%u".0F NN6* -;;JqMCQWY]_dKefx(-)/-	)^ A ks   'F8N)__name__
__module____qualname____doc__r   floatrK   rv   staticmethodr   ra   tupler   rb   r]   r    r   r   r   r   !   s    A%=.+0=.%*=. "'=.@e.%*e.!&e. ).e.P 5   e 5 6 ek  D * *B WU W% W Wr   r   N)r   commonr   common.Elementr   common.Collectionr   layout.Blocksr   shape.Shapesr   
text.Linesr   r
   r   r   r   r   r   r   r   <module>r      s3   &  $ * " !  * # ] ]r   