o
    Ci(                     @   sp  U d Z ddlZddlmZmZmZmZmZmZm	Z	 ddl
mZmZmZ daeed< daeed< g aee ed	< d
Zeed< G dd deZ			d-de	eedf de	eedf de	eee df deeeee f fddZdee dee dee fddZdee defddZdedeee ee f deee ee f deee ee f dee	eeeef f eeef eee f deed f d!ed"ed#eeeeeeegdf  d$edeeeee ee f fd%d&Zded'ee	eef  d(ee d)ee dee	eeeef f eeef eee f deed f d!ed"ed*ed#eeeeeeegdf  deeef fd+d,ZdS ).ze
Code related to text extraction.

Some parts are still in _page.py. In doubt, they will stay there.
    N)AnyCallableDictListOptionalTupleUnion   )DictionaryObjectTextStringObjectencode_pdfdocencodingCUSTOM_RTL_MINCUSTOM_RTL_MAXCUSTOM_RTL_SPECIAL_CHARS    LAYOUT_NEW_BT_GROUP_SPACE_WIDTHSc                   @   s   e Zd ZdS )OrientationNotFoundErrorN)__name__
__module____qualname__ r   r   w/var/www/html/fyndo/python/python_agents/rag_suite/venv/lib/python3.10/site-packages/pypdf/_text_extraction/__init__.pyr      s    r   _min_maxspecialsreturnc                 C   sv   t | tr| an	t | trt| at |tr|an	t |tr"t|at |tr/dd |D ant |tr6|atttfS )a  
    Change the Right-To-Left and special characters custom parameters.

    Args:
        _min: The new minimum value for the range of custom characters that
            will be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        _max: The new maximum value for the range of custom characters that will
            be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        specials: The new list of special characters to be inserted in the
            current insertion order.
            If set to ``None``, the current value will not be changed.
            If set to a string, it will be converted to a list of ASCII codes.
            The default value is an empty list.

    Returns:
        A tuple containing the new values for ``CUSTOM_RTL_MIN``,
        ``CUSTOM_RTL_MAX``, and ``CUSTOM_RTL_SPECIAL_CHARS``.
    c                 S   s   g | ]}t |qS r   )ord.0xr   r   r   
<listcomp>=   s    z"set_custom_rtl.<locals>.<listcomp>)
isinstanceintr   strr   r   r   list)r   r   r   r   r   r   set_custom_rtl   s   






r&   mnc              	   C   s   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   |d  | d |d  | d |d   |d  gS )Nr      r	         r   r   )r'   r(   r   r   r   multC   s   &&r,   c                 C   s4   | d dkrdS | d dk rdS | d dkrdS dS )	Nr*   gư>r   gư   r)   Z     r   )r'   r   r   r   orientN   s   r0   text	cmtm_prevcmtm_matrix	memo_cmtmcmaporientations.output	font_sizevisitor_text
spacewidthc
                 C   sV  |d }
|d }|d }|d }|d }|d }t ||
}t ||}t|}|d |d  }|d |d  }tt|d |d  t|d |d   }|| }|}
||vrYtz8|dkr|d| k r||  d d	kr|| d	 7 }|d ur|| d	 |||d | d
} n
t||d k rt||	| d kr||  d dkr| d7 } n|dkr|d| kr||  d d	kr|| d	 7 }|d ur|| d	 |||d | d
} nt||d k rt||	| d kr||  d dkr| d7 } n|dkrA|d| kr||  d d	kr|| d	 7 }|d ur|| d	 |||d | d
} nrt||d k r@t||	| d kr@||  d dkr@| d7 } nP|dkr|d| k rp||  d d	kro|| d	 7 }|d urm|| d	 |||d | d
} n!t||d k rt||	| d kr||  d dkr| d7 } W n
 ty   Y nw | }| }
| ||
|fS )Nr   r)   r+   r   r*   r	   gr   
 g333333?    r-   g?r.   r/   )r,   r0   mathsqrtabsr   	Exceptioncopy)r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   cm_prevtm_prev	cm_matrix	tm_matrixmemo_cmmemo_tmm_prevr'   orientationdelta_xdelta_ykfr   r   r   crlf_space_checkY   s   

.



rP   operandsrF   rG   rtl_dirc
                    sR  t ||}
t|
}||v r%t|dkr%t|d tr&| |d 7 } | |fS d}t|d tr5t|d n|d }t d trbz
| d d}W n# tya   | d dkr[dndd}Y nw d fdd|D } fdd|D D ]}t|d	krt	|}nd	}|d
ksd|  krdksn d|  krdksn d|  krdksn |t
v r|r||  n| | } qwd|  krdksn d|  krdksn d|  krdksn t|  krtkrn n|sd}|| 7 }|	d ur|	| || d | d} ||  } qw|r d}|| 7 }|	d ur|	| || d | d} | | } qw| |fS )Nr   r<   surrogatepasscharmapz	utf-16-bec                    s2   g | ]}| d  v r d  | nt |f qS )r   )bytesdecoder   r5   r   r   r!      s   2 zhandle_tj.<locals>.<listcomp>c                    s(   g | ]}| d  v r d  | n|qS )r)   r   r   rW   r   r   r!      s   ( r)   /   :   @   i    io   i   i!  i  i  i  i  ip  i  Tr*   F)r,   r0   lenr"   r$   r   rV   rB   joinr   r   r   r   )r1   rQ   rF   rG   r5   r6   r7   r8   rR   r9   r'   rK   tttr    xxr   rW   r   	handle_tj   sh   
?	



r`   )NNN)__doc__r?   typingr   r   r   r   r   r   r   genericr
   r   r   r   r#   __annotations__r   r   r   rB   r   r$   r&   floatr,   r0   rP   boolr`   r   r   r   r   <module>   s    $
"-&
	

s&
	

