
    kh.                         d Z ddlZddlmZ ddlmZ ddlmZ  G d de      Z G d	 d
e      Z	 G d de      Z
 G d de      Zy)a^  Objects representing PDF stroke and filling extracted from Path.

* Stroke: consider only the horizontal or vertical path segments
* Fill  : bbox of closed path filling area

Hyperlink in ``PyMuPDF`` is represented as uri and its rectangular area (hot-area), while the
applied text isn't extracted explicitly. To reuse the process that identifying applied text of
text style shape (e.g. underline and highlight), hyperlink is also abstracted to be a ``Shape``.

.. note::
    The evident difference of hyperlink shape to text style shape is: the ``type`` of hyperlink 
    shape is determined in advance, while text style shape needs to be identified by the position 
    to associated text blocks.

Above all, the semantic meaning of shape instance may be:

* strike through line of text
* under line of text
* highlight area of text
* table border
* cell shading
* hyperlink

Data structure::

    {
        'type': int,
        'bbox': (x0, y0, x1, y1),
        'color': srgb_value,

        # for Stroke
        'start': (x0, y0),
        'end': (x1, y1),
        'width': float,

        # for Hyperlink
        'uri': str
    }

.. note::
    These coordinates are relative to real page CS since they're extracted from ``page.get_drawings()``,
    which is based on real page CS. So, needn't to multiply Element.ROTATION_MATRIX when initializing
    from source dict.
    N   )Element)RectType)	constantsc                        e Zd ZdZddef fdZed        Zej                  de	fd       Zed        Z
de	fdZde	fd	Zed
        Z fdZdefdZd Zd Z xZS )Shapez Shape object.rawc                     |xs i }|j                  dd      | _        t        |   |j                  dd             d | _        |j                  dd      | _        | j                  | _        y )Ncolorr   bbox)r   r   r   r   type)getr   superupdate_bbox_parent_typedefault_type_potential_typeselfr	   	__class__s     P/var/www/teggl/fontify/venv/lib/python3.12/site-packages/pdf2docx/shape/Shape.py__init__zShape.__init__8   sa    iRWWWa(
 	CGGFF34
 WWVR(
#00    c                     | j                   S Nr   r   s    r   r   z
Shape.typeG   s    ::%r   	rect_typec                 &    |j                   | _        y r   )valuer   r   r    s     r   r   z
Shape.typeJ   s    5>__
r   c                      | j                   dk7  S )z<If the shape type is determined to a basic item of RectType.r   r   r   s    r   is_determinedzShape.is_determinedM   s     zzRr   c                 4    | j                   |j                  k(  S )z3If shape type is equal to the specified one or not.)r   r"   r#   s     r   equal_to_typezShape.equal_to_typeR   s    zz9??**r   c                 4    | j                   |j                  z  S )z:If shape type has a chance to be the specified one or not.)r   r"   r#   s     r   has_potential_typezShape.has_potential_typeV   s    ##ioo55r   c                 .    t        d t        D              S )z"Default semantic type for a shape.c              3   4   K   | ]  }|j                     y wr   )r"   ).0ts     r   	<genexpr>z%Shape.default_type.<locals>.<genexpr>]   s     -q177-s   )sumr   r   s    r   r   zShape.default_typeZ   s     -H---r   c                 t    t         |          }|j                  | j                  | j                  d       |S )N)r   r   )r   storeupdater   r   r   resr   s     r   r1   zShape.store`   s4    gmo

JJZZ
 	 
r   blocksc                    |D ]  }|j                   j                  | j                   j                  k  r1|j                   j                  | j                   j                  kD  r y| j                  |      }|| _        || j
                  k7  s y y)aQ  Determine semantic type based on the position to text blocks. Note the results might be 
        a combination of raw types, e.g. the semantic type of a stroke can be either text strike,
        underline or table border.

        Args:
            blocks (list): A list of ``Line`` instance, sorted in reading order in advance.
        N)r   y1y0_semantic_typer   r   )r   r5   liner    s       r   parse_semantic_typezShape.parse_semantic_typei   ss      	3Dyy||diill*H yy||diill*E ++D1I#,D $+++U	3r   c                     | j                   S )z Check semantic type based on the position to a text line.
            Return all possibilities if can't be determined with this text line.
            Prerequisite: intersection exists between this shape and line.
        )r   )r   r:   s     r   r9   zShape._semantic_type   s    
    r   c                 D    |j                  | j                  ||dd       y)z'Plot rectangle shapes with ``PyMuPDF``.r   T)r   fillwidthoverlayN)	draw_rectr   )r   pager   s      r   plotz
Shape.plot   s    tyyEDQr   r   )__name__
__module____qualname____doc__dictr   propertyr   setterr   r%   r'   r)   r   r1   listr;   r9   rC   __classcell__r   s   @r   r   r   6   s    14 1 % %	[[DXD D   +h +68 6 . .
3 3,!Rr   r   c                        e Zd ZdZddef fdZed        Zed        Zed        Z	ed        Z
ed        Zed	        Z fd
Zed        Zd Z fdZd Z xZS )Strokez Horizontal or vertical stroke of a path. 
        The semantic meaning may be table border, or text style line like underline and strike-through.
    r	   c                 :   |xs i }t        j                  |j                  dd            | _        t        j                  |j                  dd            | _        | j                  j
                  | j                  j
                  kD  s-| j                  j                  | j                  j                  kD  r#| j                  | j                  c| _        | _        t        | !  |       |j                  dd      | _	        t        | )  | j                                y )Nstart)        rR   endr?   rR   )fitzPointr   _start_endxyr   r   r?   r   _to_rectr   s     r   r   zStroke.__init__   s    iRjj*!=>JJswwuj9:	;;==499;;&$++--$))++*E%)YY"DK 	WWWc*
 	DMMO,r   c                 X    t        | j                  d   | j                  d   z
        dk  S )N   MbP?absrV   rW   r   s    r   
horizontalzStroke.horizontal   s'    !$T[[^DIIaL%@!A$!FFr   c                 X    t        | j                  d   | j                  d   z
        dk  S )Nr   r]   r^   r   s    r   verticalzStroke.vertical   s'    "4;;q>$))A,#>?DDr   c                 .    | j                   j                  S r   )rV   rX   r   s    r   x0z	Stroke.x0       &r   c                 .    | j                   j                  S r   )rW   rX   r   s    r   x1z	Stroke.x1       $r   c                 .    | j                   j                  S r   )rV   rY   r   s    r   r8   z	Stroke.y0   re   r   c                 .    | j                   j                  S r   )rW   rY   r   s    r   r7   z	Stroke.y1   rh   r   c                    t        j                  |      }|j                         dk(  rYt        j                  |dd       | _        t        j                  |dd       | _        t        |   | j                                | S t        |   |       |j                  |j                  k\  rh|j                  |j                  z   dz  }t        j                  |j                  |      | _        t        j                  |j                  |      | _        | S |j                  |j                  z   dz  }t        j                  ||j                        | _        t        j                  ||j                        | _        | S )a!  Update stroke bbox (related to real page CS).

        * Update start/end points if ``rect.area==0``.
        * Ppdate bbox directly if ``rect.area!=0``.

        Args:
            rect (fitz.Rect, tuple): ``(x0, y0, x1, y1)`` like data.

        Returns:
            Stroke: self
        rR   r   r   N       @)rT   Rectget_arearU   rV   rW   r   r   rZ   r?   heightr8   r7   rd   rg   )r   rectrY   rX   r   s       r   r   zStroke.update_bbox   s    yy ==?C**T!AY/DK

48,DIG0$  G% zzT[[(WWTWW_c)"jj!4"jj!4	 	 WWTWW_c)"jjDGG4"jjDGG4	r   c                     t         j                  j                  t         j                  j                  z  t         j                  j                  z  S )zTDefault semantic type for a Stroke shape: table border, underline or strike-through.)r   BORDERr"   	UNDERLINESTRIKEr   s    r   r   zStroke.default_type   s4     $$x'9'9'?'??(//BWBWWWr   c                    | j                  d      }|j                  j                  |      s| j                  S | j                  }|j
                  }||k7  r| j                  S |r,|j                  j                  |j                  j                  fn+|j                  j                  |j                  j                  f\  }}|r,| j                  j                  | j                  j                  fn+| j                  j                  | j                  j                  f\  }}||dz
  k\  r=||dz   k  r5t        j                  j                  t        j                  j                  z  S t        j                  j                  S )a7  Override. Check semantic type of a Stroke: table border v.s. text style line, e.g. underline 
        and strike-through. It's potentially a text style line when:

        * the stroke and the text line has same orientation; and
        * the stroke never exceeds the text line along the main direction
        rl   r\   )get_expand_bboxr   
intersectsr   r`   is_horizontal_textrd   rg   r8   r7   r   rt   r"   rs   rr   )	r   r:   expanded_shapeh_shapeh_lineline_x0line_x1shape_x0shape_x1s	            r   r9   zStroke._semantic_type   s!    --c2yy##N3$$$ //((f$$$ <BDIILL$)),,7		VZV_V_VbVbGc=DdiillDIILL9499<<Y]YbYbYeYeJf(WQY8WQY#6??((8+=+=+C+CCC??(((r   c                     t         |          }|j                  t        | j                        t        | j
                        | j                  d       |S )N)rQ   rS   r?   )r   r1   r2   tuplerV   rW   r?   r3   s     r   r1   zStroke.store   sE    gmo

4;;'#ZZ
 	
 
r   c                     | j                   dz  }| j                  \  }}| j                  \  }}||z
  ||z
  ||z   ||z   fS )z&Convert centerline to rectangle shape.rl   )r?   rV   rW   )r   hrd   r8   rg   r7   s         r   rZ   zStroke._to_rect  sI    JJBB1bdBqD"Q$''r   r   )rD   rE   rF   rG   rH   r   rI   r`   rb   rd   rg   r8   r7   r   r   r9   r1   rZ   rL   rM   s   @r   rO   rO      s    -4 -" F FD D& &$ $& &$ $$L X X)6(r   rO   c                   2    e Zd ZdZdefdZed        Zd Zy)Fillz Rectangular (bbox) filling area of a closed path. 
        The semantic meaning may be table shading, or text style like highlight.
    max_border_widthc                     t        | j                  j                  | j                  j                        }||kD  ryt	        || j
                  d      j                  | j                        S )a  Convert to Stroke instance based on width criterion.

        Args:
            max_border_width (float): Stroke width must less than this value.

        Returns:
            Stroke: Stroke instance.
        
        .. note::
            A Fill from shape point of view may be a Stroke from content point of view.
            The criterion here is whether the width is smaller than defined ``max_border_width``.
        N)r?   r   )minr   r?   ro   rO   r   r   )r   r   ws      r   	to_strokezFill.to_stroke  sS     		!1!12 A

;<HHSSr   c                 l    t         j                  j                  t         j                  j                  z  S )zHDefault semantic type for a Fill shape: table shading or text highlight.)r   SHADINGr"   	HIGHLIGHTr   s    r   r   zFill.default_type'  s'     %%(:(:(@(@@@r   c                 B   | j                   j                  | j                   j                  kD  }|r| j                   j                  n| j                   j                  }|j                  }||k7  r| j                  S | j                  |t        j                        s| j                  S |r|j                   j                  n|j                   j                  }||dt        j                  z  z   k  rt        j                  j                  S t        j                  j                  S )a  Override. Check semantic type based on the position to a text line. Along the main dimension,
        text highlight never exceeds text line.

        Args:
            line (Line): A text line.

        Returns:
            RectType: Semantic type of this shape.
        
        .. note::
            Generally, table shading always contains at least one line, while text highlight never
            contains any lines. But in real cases, with margin exists, table shading may not 100% 
            contain a line.
        )	thresholdr   )r   r?   ro   rx   r   get_main_bboxr   FACTOR_MAJOR
MINOR_DISTr   r   r"   r   )r   r:   rz   w_shaper{   w_lines         r   r9   zFill._semantic_type,  s      ))//$))"2"22%,$))//$))2B2B ((f$$$!!$)2H2H!I$$$$*		0@0@fq!5!5555%%+++##)))r   N)	rD   rE   rF   rG   floatr   rI   r   r9    r   r   r   r     s0    T T, A A*r   r   c                   R     e Zd ZdZddef fdZ fdZed        Zdde	fdZ
 xZS )		HyperlinkaX  Rectangular area, i.e. ``hot area`` for a hyperlink. 
    
    Hyperlink in ``PyMuPDF`` is represented as uri and its hot area, while the applied text isn't extracted 
    explicitly. To reuse the process that identifying applied text of text style shape (e.g. underline and 
    highlight), hyperlink is also abstracted to be a ``Shape``.
    r	   c                 R    t         |   |       |j                  dd      | _        y)zFInitialize from raw dict. Note the type must be determined in advance.uri N)r   r   r   r   r   s     r   r   zHyperlink.__init__V  s$     775"%r   c                 ^    t         |          }|j                  d| j                  i       |S )Nr   )r   r1   r2   r   r3   s     r   r1   zHyperlink.store^  s/    gmo

488
 	 
r   c                 6    t         j                  j                  S )z8Default semantic type for a Hyperlink: always hyperlink.)r   	HYPERLINKr"   r   s    r   r   zHyperlink.default_typef  s     !!'''r   r5   c                 &    | j                   | _        y)zLSemantic type of Hyperlink shape is determined, i.e. ``RectType.HYPERLINK``.N)r   r   )r   r5   s     r   r;   zHyperlink.parse_semantic_typek  s    #00r   r   )rD   rE   rF   rG   rH   r   r1   rI   r   rK   r;   rL   rM   s   @r   r   r   N  s9    &4 & ( (1 1r   r   )rG   rT   common.Elementr   common.sharer   commonr   r   rO   r   r   r   r   r   <module>r      sR   +Z  $ # SRG SRl}(U }(@?*5 ?*D1 1r   