
    kh                     J    d Z ddlZddlmZ ddlmZ ddlmZ  G d d	e      Zy)
z8Collection of :py:class:`~pdf2docx.page.Page` instances.    N   )RawPageFactory   )BaseCollection)Fontsc                   ,    e Zd ZdZd Zedefd       Zy)PageszA collection of ``Page``.c                 v   t        j                  |      }g g }}d}| D ]  }|j                  rt        j                  ||j
                     d      } |j                  di | |s|j                  j                         rd} |j                  di | |j                  |       |j                  |_        |j                  |_        |j                  j                         j                  |j                   j"                         |j%                  |       |j%                  |        |st'        j(                  d       t*        j-                  |      \  }	}
t/        ||      D ]R  \  }} |j0                  di |}|x|_        |_         |j4                  di |}|j6                  j                  |       T y)zAnalyze document structure, e.g. page section, header, footer.

        Args:
            fitz_doc (fitz.Document): ``PyMuPDF`` Document instance.
            settings (dict): Parsing parameters.
        FPyMuPDF)page_enginebackendTzFWords count: 0. It might be a scanned pdf, which is not supported yet.N )r   extractskip_parsingr   createidrestoreraw_textstripclean_upprocess_fontwidthheightfloat_imagesresetextendblocksfloating_image_blocksappendloggingwarningr	   _parse_documentzipcalculate_marginmarginparse_sectionsections)selffitz_docsettingsfontspages	raw_pageswords_foundpageraw_pageheaderfooterr%   r'   s                O/var/www/teggl/fontify/venv/lib/python3.12/site-packages/pdf2docx/page/Pages.pyparsezPages.parse   s    h'
 ry 	D  ( &,,$''9JT]^HH(x( 8#4#4#:#:#<" H)) !!%( "DJ"//DK##%,,X__-R-RSX&LL1	6 OOde ..y9 "%3 	+ND(.X..::F,22HOdk .x--99HMM  *	+    r-   c                      y)z<Parse structure in document/pages level, e.g. header, footer) r7   r   )r-   s    r3   r"   zPages._parse_documentV   s     r5   N)__name__
__module____qualname____doc__r4   staticmethodlistr"   r   r5   r3   r	   r	      s'    #D+N $  r5   r	   )r;   r    r   common.Collectionr   
font.Fontsr   r	   r   r5   r3   <module>r@      s$    ?  * . NN Nr5   