
    kh_\                         d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZ d	d
lmZ d	dlmZ d	dlmZ  G d d      Z G d d      Zy)z4Parsing table structure based on strokes and fills.
    N   )Element)RectType)	constants)ShapeStroke)Shapes   )
TableBlock)Row)Cellc                   j    e Zd ZdZdefdZed        Zed        Zde	de	fdZ
d	efd
ZddedefdZy)CellStructurez;Cell structure with properties bbox, borders, shading, etc.bboxc                     t        j                  |      | _        t        j                  |      | _        d | _        d | _        d| _        y )Nr
   r
   )fitzRectr   merged_bboxbordersshadingmerged_cells)selfr   s     Y/var/www/teggl/fontify/venv/lib/python3.12/site-packages/pdf2docx/table/TableStructure.py__init__zCellStructure.__init__   s=    IIdO	99T?   "    c                 N    | j                   d   dk(  xs | j                   d   dk(  S Nr   r
   r   r   s    r   	is_mergedzCellStructure.is_merged%   s.     $ 1 1! 4a 7 R4;L;LQ;OQR;RRr   c                 N    | j                   d   dkD  xs | j                   d   dkD  S r   r   r    s    r   
is_mergingzCellStructure.is_merging(   s.    !%!2!21!5a!7!Q4;L;LQ;OPQ;QQr   	h_strokes	v_strokesc                     | j                   \  }}}}| j                  ||   d      }| j                  ||   d      }| j                  ||   d      }	| j                  ||   d      }
|||	|
f| _        y)aS  Parse cell borders from strokes.
        
        Args:
            h_strokes (dict): A dict of y-coordinate v.s. horizontal strokes, e.g. 
                ``{y0: [h1,h2,..], y1: [h3,h4,...]}``
            v_strokes (dict): A dict of x-coordinates v.s. vertical strokes, e.g. 
                ``{x0: [v1,v2,..], x1: [v3,v4,...]}``
        rowcolN)r   _get_border_stroker   )r   r$   r%   x0y0x1y1topbottomleftrights              r   parse_borderszCellStructure.parse_borders,   s     ))BB%%imU;((2>&&y}e<''	"u=VT51r   fillsc                    | j                   \  }}}}|j                  }|j                  }|j                  }|j                  }	| j                  \  }
}}}|
|	dz  z   ||dz  z   ||dz  z
  ||dz  z
  f}t               j	                  |      }|D ],  }|j                  |t        j                        s%|| _         y d| _        y)zParse cell shading from fills.
        
        Args:
            fills (Shapes): Fill shapes representing cell shading.
               @)	thresholdN)	r   widthr   r   update_bboxcontainsr   FACTOR_MOSTr   )r   r3   r.   r/   r0   r1   w_topw_rightw_bottomw_leftr*   r+   r,   r-   
inner_bboxtargetshapes                    r   parse_shadingzCellStructure.parse_shading=   s     $(<< VT5		++<< ))BBmRc	\2gck>2hsl?S
&&z2  	 E~~f	0E0E~F$	 
  DLr   strokes	directionc                    |s
t               S |dk(  rdnd}| j                  |   | j                  |dz      }}d}g }|D ]  }|j                  |j                  |j                  |j
                  f}	|	|   |	|dz      }}
||k  rD|
|k\  r nEt        ||      t        ||
      z
  }|t        j                  k  rx||z  }|j                  |        |||z
  z  t        j                  k  r
t               S t        |      dk(  r|d   S t        |D cg c]  }|j                   c}      }t        |      dk(  r|d   S t               S c c}w )z Find strokes representing cell borders.
        
        Args:
            strokes (Shapes): Candidate stroke shapes for cell border.
            direction (str): Either ``row`` or ``col``.
        r'   r   r
   r   g        )r   r   r*   r+   r,   r-   minmaxr   
MAJOR_DISTappendFACTOR_MAJORlensetcolor)r   rC   rD   idxr*   r,   Lborder_strokesstroker   t0t1dl
propertiess                 r   r)   z CellStructure._get_border_strokeX   sR    vx e#a !!#&(8(8Q(?B  
	*FIIvyy&))VYY?D#YSUBRxRxRs2r{*BI((((GA!!&)
	* be9y---fh ~!.*;#;^D6&,,DE
$'
OQ$6~a DFHD Es   ENr'   )__name__
__module____qualname____doc__listr   propertyr!   r#   dictr2   r	   rB   strr)    r   r   r   r      sd    E"D "$ R RQ Q2d 2d 2" &  6'E 'E3 'Er   r   c            	       .   e Zd ZdZdefdZed        Zed        Zed        Z	ed        Z
ed        Zd	efd
Zd Zd Zedededefd       Zd Zd Zededededefd       Zed#dededefd       Zedefd       ZdedefdZdededed efd!Zy")$TableStructureaQ  Parsing table structure based on strokes/fills.
    
    Steps to parse table structure::

            x0        x1       x2        x3
        y0  +----h1---+---h2---+----h3---+
            |         |        |         |
            v1        v2       v3        v4
            |         |        |         |
        y1  +----h4------------+----h5---+
            |                  |         |
            v5                 v6        v7
            |                  |         |
        y2  +--------h6--------+----h7---+
        

    1. Group horizontal and vertical strokes::
        
        self.h_strokes = {
            y0 : [h1, h2, h3],
            y1 : [h4, h5],
            y2 : [h6, h7]
        }
    
    These ``[x0, x1, x2, x3] x [y0, y1, y2]`` forms table lattices, i.e. 2 rows x 3 cols.

    2. Check merged cells in row/column direction.

    Let horizontal line ``y=(y0+y1)/2`` cross through table, it gets intersection with 
    ``v1``, ``v2`` and ``v3``, indicating no merging exists for cells in the first row.

    When ``y=(y1+y2)/2``, it has no intersection with vertical strokes at ``x=x1``, i.e. 
    merging status is ``[1, 0, 1]``, indicating ``Cell(2,2)`` is merged into ``Cell(2,1)``.

    So, the final merging status in this case::

        [
            [(1,1), (1,1), (1,1)],
            [(1,2), (0,0), (1,1)]
        ]
    rC   c                     g | _         t        j                  ||d   |d         \  | _        | _        | j                  r| j                  sy| j                         | _         y)a  Parse table structure from strokes and fills shapes.
        
        Args:
            strokes (Shapes): Stroke shapes representing table border. 
                For lattice table, they're retrieved from PDF raw contents; 
                for stream table, they're determined from layout of text blocks.

        .. note::
            Strokes must be sorted in reading order in advance, required by checking merged cells.        
        min_border_clearancemax_border_widthN)cellsra   _group_h_v_strokesr$   r%   _init_cells)r   rC   settingss      r   r   zTableStructure.__init__   s]     
 *8)J)J7 !78 !34*6& ~~T^^V %%'
r   c                    | j                   st        j                         S | j                   d   d   j                  j                  \  }}| j                   d   d   j                  j
                  \  }}t        j                  ||||      S )zUTable boundary bbox.

        Returns:
            fitz.Rect: bbox of table.
        r   )re   r   r   r   tlbr)r   r*   r+   r,   r-   s        r   r   zTableStructure.bbox   so     zz$))+-Aq!&&))BB#((++ByyBr"%%r   c                 ,    t        | j                        S )N)rK   re   r    s    r   num_rowszTableStructure.num_rows   s    "4::.r   c                 N    | j                   rt        | j                   d         S dS )Nr   )re   rK   r    s    r   num_colszTableStructure.num_cols   s     59ZZs4::a=1FQFr   c                     | j                   sg S | j                   D cg c]  }|d   j                  j                   }}|j                  | j                   d   d   j                  j                         |S c c}w )zlTop y-coordinate ``y0`` of each row.

        Returns:
            list: y-coordinates of each row.
        r   rj   )re   r   r+   rI   r-   )r   r'   Ys      r   y_rowszTableStructure.y_rows   sa     zz"9'+zz2SV[[^^22	B"''**+ 3s    A6c                     | j                   sg S | j                   d   D cg c]  }|j                  j                   }}|j                  | j                   d   d   j                  j                         |S c c}w )zsLeft x-coordinate ``x0`` of each column.

        Returns:
            list: x-coordinates of each column.
        r   rj   )re   r   r*   rI   r,   )r   cellXs      r   x_colszTableStructure.x_cols   sa     zz"9&*jjm4dTYY\\44	Ar"''**+ 5s   A6r3   c                     | j                   s| S | j                          | j                   D ]M  }|D ]F  }|j                  r|j                  | j                  | j
                         |j                  |       H O | S )zyParse table structure.
        
        Args:
            fills (Shapes): Fill shapes representing cell shading.
        )re   _check_merging_statusr!   r2   r$   r%   rB   )r   r3   r'   ru   s       r   parsezTableStructure.parse   sw     zz$; 	""$ :: 	*C *>>8""4>>4>>B""5)*	* r   c           
         t               }| j                  D ]^  }t               }|d   j                  j                  |d   j                  j
                  z
  |_        |D ]  }|j                  r|j                  t                      *|j                  \  }}}}|j                  }	|j                  }
|j                  }|j                  }|j                  r|j                  j                  nd}t        ||j                  |j                  |j                  |j                  f|	|
||f|j                  d      j                  |j                         }|j                  |        |j                  |       a |r| j#                          |S )zConvert parsed table structure to ``TableBlock`` instance.

        Returns:
            TableBlock: Parsed table block instance.
        r   N)bg_colorborder_colorborder_widthr   )r   re   r   r   r-   r+   heightr!   rI   r   r   r7   r   rM   r   r8   r   _finalize_strokes_fills)r   tablerow_structuresr'   cell_structurer.   r/   r0   r1   r;   r<   r=   r>   r|   ru   s                  r   to_table_blockzTableStructure.to_table_block  sd    "jj "	N%C'*//22>!3D3I3I3L3LLCJ"0 ! "++JJtv& ,:+A+A(VT5		++!<< <J;Q;Q>1177W[ !)%(YYV\\4::$V%*GXv$F$2$?$?	 
 ;~99:  

4 7!< LLE"	J $..0r   c                    | j                   j                         D ]!  \  }}|D ]  }t        j                  |_         # | j
                  j                         D ]!  \  }}|D ]  }t        j                  |_         # | j                  D ]5  }|D ].  }|j                  st        j                  |j                  _        0 7 y)zZFinalize table structure, so set strokes and fills type as BORDER and SHADING accordingly.N)	r$   itemsr   BORDERtyper%   re   r   SHADING)r   krC   rQ   r'   ru   s         r   r   z&TableStructure._finalize_strokes_fills6  s     ....0 	AJAw!@6;@	A ....0 	AJAw!@6;@	A :: 	FC F<<X5E5E!2F	Fr   rc   rd   c                 *   dt         dt        ffd}i }i }t        d      t        d      t        d       t        d       f\  }}}}	| D ]q  }
 ||
|
j                  r|n|       t	        ||
j
                        }t        ||
j                        }t	        ||
j                        }t        |	|
j                        }	s |r|syt               j                  ||||	f      }t        j                  ||d|       t        j                  ||d|       t        j                  ||d|       t        j                  ||d	|       |j                         D ]  \  }}|j                           |j                         D ]  \  }}|j!                           ||fS )
a  Split strokes in horizontal and vertical groups respectively.

        According to strokes below, the grouped h-strokes looks like::

            h_strokes = {
                y0 : [h1, h2, h3],
                y1 : [h4, h5],
                y2 : [h6, h7]
            }

               x0        x1        x2        x3
            y0  +----h1---+---h2---+----h3---+
                |         |        |         |
                v1        v2       v3        v4
                |         |        |         |
            y1  +----h4------------+----h5---+
                |                  |         |
                v5                 v6        v7
                |                  |         |
            y2  +--------h6--------+----h7---+

        rQ   rC   c                 (   | j                   rt        | j                  d      nt        | j                  d      }|D ]E  }t	        ||z
        kD  r||z   dz  }|j                  |      ||<   ||   j                  |         y  t        | g      ||<   y )Nr
   r5   )
horizontalroundr+   r*   abspoprI   r	   )rQ   rC   tt_rc   s       r   group_strokesz8TableStructure._group_h_v_strokes.<locals>.group_strokes]  s    '-'8'8fii#eFIIq>QA  .qt9118T3J$[[_

!!&). $VH-
r   inf)NNr.   r/   r0   r1   )r   r]   floatr   rF   r*   rG   r,   r+   r-   r   r8   ra   _check_outer_strokesr   sort_in_line_ordersort_in_reading_order)rC   rc   rd   r   r$   r%   X0Y0X1Y1rQ   
table_bbox_r   s    `            r   rf   z!TableStructure._group_h_v_strokesE  sz   0	. 	. 	. 		uuU|eEl]U5\MQBB 	$F&v/@/@)iP R#BR#BR#BR#B	$ 	* Y**BB+;<
++J	5JZ[++J	8M]^++J	6K[\++J	7L\] $//+IJAwW-G-G-II#//+LJAwW-J-J-LL)##r   c                 j   t        | j                        }t        | j                        }g }t        t	        |      dz
        D ]l  }||   ||dz      }}|j                  g        t        t	        |      dz
        D ]2  }||   ||dz      }	}t        |||	|g      }
|d   j                  |
       4 n |S )zInitialize table lattices.r
   rj   )sortedr$   r%   rangerK   rI   r   )r   rs   rw   re   ir+   r-   jr*   r,   ru   s              r   rg   zTableStructure._init_cells  s     '' s6{1}% 	'AAYqsBLL3v;q=) 'F1Q3KB$b"b"%56b	  &'	' r   c                    | j                   | j                  }}g }|D cg c]  }| j                  |    }}| j                  D ]`  }|d   j                  j
                  |d   j                  j                  z   dz  }t        j                  ||d      }|j                  |       b g }	|D cg c]  }| j                  |    }}| j                  d   D ]Z  }
|
j                  j                  |
j                  j                  z   dz  }t        j                  ||d      }|	j                  |       \ t        | j                        D ]m  }t        | j                        D ]S  }| j                  |   |   }
t        j!                  ||   |d       }t        j!                  |	|   |d       }||f|
_        U o t        | j                        D ]  }t        | j                        D ]g  }| j%                  ||       | j                  |   |   }
|
j"                  \  }}||   ||   |||z      |||z      f}t'        j(                  |      |
_        i  yc c}w c c}w )zCheck cell merging status.r   r5   r'   columnN)rw   rs   r%   re   r   r+   r-   ra   _check_merged_cellsrI   r$   r*   r,   r   rn   rp   _count_merged_cellsr   _validate_merging_regionr   r   r   )r   rw   rs   merged_cells_rowsr   ordered_strokesr'   ref_yrow_structuremerged_cells_colsru   ref_xcol_structurer   r   n_coln_rowr   s                     r   ry   z$TableStructure._check_merging_status  sB   dkk6<=4>>!,==:: 	4CV[[^^CFKKNN2C7E*>>uoW\]M$$]3	4 6<=4>>!,==JJqM 	4DYY\\$)),,.3E*>>uoW_`M$$]3	4 t}}% 	3A4==) 3zz!}Q'&::;LQ;OPQPR;ST&::;LQ;OPQPR;ST%*EN!	3	3 t}}% 	3A4==) 
3--a3
 zz!}Q'#00uq	6!9fQuWovagO#'99T? 
3	3/ > >s   I,Ir   r   rD   c                    t        | j                        }|dk(  r!d}t        |      }||   j                          ns|dk(  r!d}t	        |      }||   j                          nM|dk(  r!d}t        |      }||   j                          n'|dk(  r!d}t	        |      }||   j                          ny	||   }t               }|dz   d
z  }	|d|	|kD  rdndz  z   ||	<   t        ||z
        |kD  r-t        |j                         j                  |      g      ||<   y	|dz   dz  }
|
dz   }||   D cg c]   }|j                  |
   |j                  |   f" }}|j                  ||   d	f       ||
   }g }|D ]c  \  }}|}t        ||z
        t        j                  kD  r;|||
<   |||
dz   <   |j                  |j                         j                  |             |}e ||   j                  |       y	c c}w )aW  Add missing outer borders based on table bbox and grouped horizontal/vertical borders.
        
        Args:
            * table_bbox (Element): Table region.
            * borders (dict): Grouped horizontal (or vertical) borders at y-coordinates.
            * direction (str): Either ``top`` or ``bottom`` or ``left`` or ``right``.
        r.   r
   r/      r0   r   r1   r   N   g?rj   )r[   r   rF   r   rG   r   r   r   r	   copyr8   rI   r   
MINOR_DISTextend)r   r   rD   rd   r   rN   currentr@   sample_borderidx1	idx_startidx_endborderoccupiedstartsegmentsr0   r1   ends                      r   r   z#TableStructure._check_outer_strokes  s(    JOO$eC'lGG//1 C'lGG//1C'lGG224C'lGG224c Aqyc$s(Q;;T
 vg~ 00$m&8&8&:&F&Ft&L%MNGFO Q	IkG =DG<LN28  Y/G,. NH NOOT']D12OEH!) 	uuSy>)"6"66&+DO(+D1%OOM$6$6$8$D$DT$JK 	 G##H-!Ns   *%G+refc                 8   g }|dd D ]  }|D ]w  }|dk(  r|j                   |j                  }}n|j                  |j                  }}|| cxk  r|k  rn n|j	                  d        _| |kD  r`| |k  sf|j	                  d        ~ |j	                  d        |S )a  Check merged cells in a row/column. 
        
        Args:
            * ref (float): y (or x) coordinate of horizontal (or vertical) passing-through line.
            * borders (list[Shapes]): A list of vertical (or horizontal) rects list in a column (or row).
            * direction (str): ``row`` - check merged cells in row; ``column`` - check merged cells in a column.

        Taking cells in a row for example, give a horizontal line ``y=ref`` passing through this row, 
        check the intersection with vertical borders. The ``n-th`` cell is merged if no intersection 
        with the ``n-th`` border.
            
                +-----+-----+-----+
                |     |     |     |
                |     |     |     |
                +-----+-----------+
                |           |     |
            ----1-----0-----1----------> [1,0,1]
                |           |     |
                |           |     |
                +-----------+-----+
        r   rj   r'   r
   )r+   r-   r*   r,   rI   )r   r   rD   resshapesr   ref0ref1s           r   r   z"TableStructure._check_merged_cells  s    . am 	F ! e#!'FII$D!'FII$D#$$JJqM
 4Z 4ZJJqM+2 

19	< 
r   merging_statusc                 J    | d   dk(  ryd}| dd D ]  }|dk(  r|dz  } |S  |S )zCount merged cells, 
        e.g. ``[1,0,0,1]`` -> the second and third cells are merged into the first one.
        
        Args:
            merging_status (list): A list of 0-1 representing cell merging status.
        r   r
   Nr_   )r   numvals      r   r   z"TableStructure._count_merged_cells>  sO     !a !!"% 	CAvq
	
 
r   r   r   c                 V   | j                   |   |   }|j                  ry|j                  \  }}|dk(  r|dk(  ry| j                  |||z   |||z         sVt	        |||z         D ]<  }t	        |||z         D ](  }| j                   |   |   }|j                  s"d|_        * > d|_        yy)zCheck whether the merging region of Cell (i,j) is valid. If not, unset merging status. 

        Args:
            i (int): Row index of the target cell.
            j (int): Column index of the target cell.
        Nr
   r   )re   r!   r   _is_valid_regionr   )	r   r   r   ru   r   r   mnr@   s	            r   r   z'TableStructure._validate_merging_regionS  s     zz!}Q>>6 ((u!8q& $$Q%AeG<1ag& Fq!E'* FA!ZZ]1-F''v)<FF
 !'D =r   	row_startrow_end	col_startcol_endc                     t        ||      D ]=  }t        ||      D ],  }||k(  r||k(  r| j                  |   |   j                  r+  y ? y)a  Check whether all cells in given region are marked to merge.

        Args:
            row_start (int): Start row index (included) of the target region.
            row_end (int): End row index (excluded) of the target region.
            col_start (int): Start column index (included) of the target region.
            col_end (int): Start column index (excluded) of the target region.
        FT)r   re   r!   )r   r   r   r   r   r   r   s          r   r   zTableStructure._is_valid_regionk  s]     y'* 	!A9g. !i<AyL(zz!}Q'11 !	!
 r   NrV   )rW   rX   rY   rZ   r	   r   r\   r   rn   rp   rs   rw   rz   r   r   staticmethodr   rf   rg   ry   r   r]   r^   r   r[   r   r   intr   r   r_   r   r   ra   ra      sk   (T(v (0 	& 	& . .F F	 	 	 	& *.bF @$6 @$ @$X] @$ @$F&'3T <. <. <. <._d <. <.| 5 5t 5s 5 5p 4  (' ' '0 c S RU r   ra   )rZ   r   common.Elementr   common.sharer   commonr   shape.Shaper   r   shape.Shapesr	   r   r   r   r   ra   r_   r   r   <module>r      sB     $ #  ' ! "  nE nEbw wr   