
    kh8                     V    d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ  G d	 d
e      Z	y)al  
Objects representing PDF path (stroke and filling) extracted by ``page.get_drawings()``.

This method is new since ``PyMuPDF`` 1.18.0, with both pdf raw path and annotations like Line, 
Square and Highlight considered.

* https://pymupdf.readthedocs.io/en/latest/page.html#Page.get_drawings
* https://pymupdf.readthedocs.io/en/latest/faq.html#extracting-drawings
    N   )ImagesExtractor)lazyproperty)
Collection   )Pathc                   j    e Zd ZdZdefdZed        Zed        Z	d Z
d Z	 	 dded	ed
ededef
dZy)PathszA collection of paths.rawsc                     dd| j                   j                  | j                   j                  f}|D ]:  }t        |      }|j                  j                  |      s*| j                  |       < | S )z>Initialize paths from raw data get by ``page.get_drawings()``.r   )parentwidthheightr   bbox
intersectsappend)selfr   rectrawpaths        P/var/www/teggl/fontify/venv/lib/python3.12/site-packages/pdf2docx/shape/Paths.pyrestorezPaths.restore   sc    1dkk''););< 	C9D99''-xKK		     c                 n    t        j                         }| j                  D ]  }||j                  z  } |S )N)fitzRect
_instancesr   )r   r   instances      r   r   z
Paths.bbox"   s.    yy{>H)>>r   c                 B    | j                   D ]  }|j                  r y y)z@It is iso-oriented when all contained segments are iso-oriented.FT)r   is_iso_oriented)r   r   s     r   r    zPaths.is_iso_oriented)   s%      	:H++E	:r   c                     | j                   sy|j                         }| j                   D ]  }|j                  |        |j                          y)zePlot paths for debug purpose.

        Args:
            page (fitz.Page): ``PyMuPDF`` page.
        N)r   	new_shapeplotcommit)r   pagecanvasr   s       r   r#   z
Paths.plot1   s<     !OO6DTYYv%66r   c                     g }| j                   D ].  }|j                  s|j                  |j                                0 |S )zConvert contained paths to ISO strokes or rectangular fills.

        Returns:
            list: A list of ``Shape`` raw dicts.
        )r   r    extend	to_shapes)r   shapesr   s      r   r)   zPaths.to_shapes>   sA     OO 	,D''MM$..*+	, r   min_svg_gap_dxmin_svg_gap_dymin_wmin_hclip_image_res_ratioc           	      P   g }| j                   r#|j                  | j                                |g fS g }t        | j                  j
                        }|j                  ||||      }	dt        dt        fd}
|	D cg c]  }t                }}| j                  D ]P  }t        |	|      D ]?  \  \  }}}|j                  j                  |      s% |
||      s|j                  |        P R t        |	|      D ]  \  \  }}}|j                   r[|j                  |j                                |D ]6  }|j                  |j                  t!        j"                  |      |             8 p|j                  |j                  t!        j"                  |      |              ||fS c c}w )a  Convert paths to iso-oriented shapes or images. The semantic type of path is either table/text style or 
        vector graphic. This method is to:
        * detect svg regions -> exist at least one non-iso-oriented path
        * convert svg to bitmap by clipping page
        * convert the rest paths to iso-oriented shapes for further table/text style parsing

        Args:
            min_svg_gap_dx (float): Merge svg if the horizontal gap is less than this value.
            min_svg_gap_dy (float): Merge svg if the vertical gap is less than this value.
            min_w (float): Ignore contours if the bbox width is less than this value.
            min_h (float): Ignore contours if the bbox height is less than this value.
            clip_image_res_ratio (float, optional): Resolution ratio of clipped bitmap. Defaults to 3.0.

        Returns:
            tuple: (list of shape raw dict, list of image raw dict).
        r   contoursc                 r    |D ]2  }t        j                  |      j                  | j                        s2 y y)NTF)r   r   containsr   )r   r1   r   s      r   contained_in_inner_contoursz?Paths.to_shapes_and_images.<locals>.contained_in_inner_contoursn   s2      D99T?++DII6tDr   )r    r(   r)   r   r   page_enginedetect_svg_contoursr   listr
   r   zipr   r   r   clip_page_to_dictr   r   )r   r+   r,   r-   r.   r/   
iso_shapesimagesiegroupsr4   _group_pathsr   r   inner_bboxespathssvg_bboxs                     r   to_shapes_and_imageszPaths.to_shapes_and_imagesL   s   & 
dnn./r>! T[[445''uU	T 	D 	 )//1uw//OO 	D/26;/G +$|e99''-6t\JELLY]L^	 ,/v{+C 		[' T<%$$!!%//"34 , cHMM""6"6tyy7JL`"abc
 b22499T?DXYZ		[ 6!!' 0s   ;F#N)   rD   r   r   g      @)__name__
__module____qualname____doc__r7   r   r   r   propertyr    r#   r)   floatrC    r   r   r
   r
      sx     	4 	    
 RTY\;"% ;"5 ;"&+;"5:;"SX;"r   r
   )
rH   r   image.ImagesExtractorr   common.sharer   common.Collectionr   r   r
   rK   r   r   <module>rO      s)     3 ' + s"J s"r   