o
    Ci5                     @   s   d dl Zd dlZd dlZd dlZd dlmZ dd Zdd Zddd	Z	dd
dZ
dd Zdd Z	d ddZdd Zdd ZG dd dZG dd deZG dd dZG dd dZdS )!    N)
ThreadPoolc                    sB    j \}}j ||fksJ t fddt|D }| j S )z< computes the intersection measure of two result tables
    c                 3   s&    | ]}t  | | jV  qd S N)npintersect1dsize).0iI1I2 p/var/www/html/fyndo/python/python_agents/rag_suite/venv/lib/python3.10/site-packages/faiss/contrib/evaluation.py	<genexpr>   s
    
z+knn_intersection_measure.<locals>.<genexpr>)shapesumranger   )r
   r   nqrankninterr   r	   r   knn_intersection_measure   s   

r   c                 C   sh   | j d }||k }t| }t|D ]}|| || | | |d     ||d < q||| || fS )z select a set of results    )r   r   
zeros_liker   r   )limsDIthreshr   masknew_limsr   r   r   r   filter_range_results   s   

.r   overallc                    s   fdd fddj d }j d |ksJ tj|ddfdd	}td
}||t| tdd dd  dd dd  |dS )zucompute the precision and recall of range search results. The
    function does not take the distances into account. c                        |  | d   S Nr   r   r   Ireflims_refr   r   ref_result_for,      z range_PR.<locals>.ref_result_forc                    r    r!   r   r"   )Inewlims_newr   r   new_result_for/   r'   z range_PR.<locals>.new_result_forr   int64dtypec                    s,   | } | }t ||}t|| < d S r   )r   r   len)qgt_idsnew_idsinter)r*   r   r&   r   r   compute_PR_for7   s   z range_PR.<locals>.compute_PR_for   Nmode)r   r   zerosr   mapr   counts_to_PR)r%   r$   r)   r(   r7   r   r3   poolr   )r(   r$   r)   r%   r*   r   r&   r   range_PR(   s   
r<   c           	      C   s   |dkr9|   |  |  } }}|dkr|| }nd}| dkr)||  }||fS |dkr3d}||fS d}||fS |dkrw| dk}d| |< ||  }|| dkt||< |dk}t|| dkscJ d||< d||< || }| | fS t )z computes a  precision-recall for a ser of queries.
    ngt = nb of GT results per query
    nres = nb of found results per query
    ninter = nb of correct results per query (smaller than nres of course)
    r   r         ?        averager   )r   astypefloatr   allmeanAssertionError)	ngtnresr   r7   	precisionrecallr   recalls
precisionsr   r   r   r:   P   s2   
r:   c                 C   s   t |}t |}t| d }t|D ]-}| | | |d  }}||| }	||| }
|
 }|	| |||< |
| |||< q||fS )z& sort 2 arrays using the first as key r   )r   
empty_liker.   r   argsort)r   r   r   r   D2r   r   l0l1iidior   r   r   sort_range_res_2~   s   

rS   c                 C   sb   t |}t| d }t|D ]}| | | |d  }}||| |||< |||   q|S r!   )r   rK   r.   r   sort)r   r   r   r   r   rN   rO   r   r   r   sort_range_res_1   s   
rU   ref,newc                    s"  d|v r	t d|v rt \ fdd fddjd }jd |ks2J t}	tj||	dfd	d
fdd}
td}||
t| t|	}t|	}t|	D ])}t	dd|df dd|df dd|df |d\}}|||< |||< qc||fS )z compute precision-recall values for range search results
    for several thresholds on the "new" results.
    This is to plot PR curves
    refnewc                    r    r!   r   r"   r#   r   r   r&      r'   z4range_PR_multiple_thresholds.<locals>.ref_result_forc                    s.   |  | d  }}||  || fS r!   r   )r   rN   rO   )Dnewr(   r)   r   r   r*      s   z4range_PR_multiple_thresholds.<locals>.new_result_forr      r+   r,   c                    s   | }| \}}t | | d d df< |jdkrd S t|}| | d d df< |jdkr2d S t||}d||t |k< t|| |k}tdg|f}||  | d d df< d S )Nr   r   r5      )r.   r   r   searchsortedcumsumhstack)r/   r0   res_idsres_disrF   rP   n_ok)countsr*   r&   
thresholdsr   r   r3      s   

z4range_PR_multiple_thresholds.<locals>.compute_PR_forr4   Nr   r[   r6   )
rU   rS   r   r.   r   r8   r   r9   r   r:   )r%   r$   r)   rY   r(   rc   r7   do_sortr   ntr3   r;   rJ   rI   tprr   )	rY   r(   r$   rb   r)   r%   r*   r&   rc   r   range_PR_multiple_thresholds   s.   



0

ri   c           	   	   C   s   t jj| |dd t }tt|D ]=}t || || kr!q| |df }t | D ]#}||kr3q,| |ddf |k}|	t
|||f t
|||f  q,qdS )z: test that knn search results are identical, raise if not    decimalr5   N)r   testingassert_array_almost_equalunittestTestCaser   r.   rB   uniqueassertEqualset)	Drefr$   rY   r(   testcaser   skip_disdisr   r   r   r   test_ref_knn_with_draws   s   &rx   c                 C   s   t j| | t| d }t|D ]O}| | | |d  }}	|||	 }
|||	 }|||	 }|||	 }t |
|kr>ndd }||
|\}
}|||\}}t j|
| t jj||dd qdS )zM compare range search results wrt. a reference result,
    throw if it fails r   c                 S   s   |   }| | || fS r   )rL   )r   r   rR   r   r   r   sort_by_ids  s   z+test_ref_range_results.<locals>.sort_by_idsrj   rk   N)r   rm   assert_array_equalr.   r   rB   rn   )r%   rt   r$   r)   rY   r(   r   r   rN   rO   Ii_refIi_newDi_refDi_newry   r   r   r   test_ref_range_results   s    r   c                   @   sH   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )OperatingPointszw
    Manages a set of search parameters with associated performance and time.
    Keeps the Pareto optimal points.
    c                 C   s   g | _ g | _d S r   )operating_pointssuboptimal_pointsselfr   r   r   __init__  s   
zOperatingPoints.__init__c                 C      t )z1 return -1 if k1 > k2, 1 if k2 > k1, 0 otherwise NotImplementedr   k1k2r   r   r   compare_keys      zOperatingPoints.compare_keysc                 C   r   )zC parameters to say we do noting, takes 0 time and has 0 performancer   r   r   r   r   do_nothing_key$  r   zOperatingPoints.do_nothing_keyc                 C   s,   | j D ]\}}}||kr||kr dS qdS )NFT)r   )r   perf_newt_new_perfrf   r   r   r   is_pareto_optimal(  s
   z!OperatingPoints.is_pareto_optimalc                 C   s\   d}d}| j | j D ]\}}}| ||}|dkr||kr|}|dk r)||k r)|}q
||fS )z, predicts the bound on time and performance r>   r=   r   )r   r   r   )r   keymin_timemax_perfkey2r   rf   cmpr   r   r   predict_bounds.  s   zOperatingPoints.predict_boundsc                 C   s   |  |\}}| ||S r   )r   r   )r   r   r   r   r   r   r   should_run_experiment<  s   z%OperatingPoints.should_run_experimentc                 C   s   |  ||r@d}|t| jk r5| j| \}}}||kr*||k r*| j| j| n|d7 }|t| jk s| j|||f dS | j|||f dS )Nr   r   TF)r   r.   r   r   appendpop)r   r   r   rf   r   op_Lsperf2t2r   r   r   add_operating_point@  s   
z#OperatingPoints.add_operating_pointN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   r   r     s    r   c                   @   sP   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd ZdS )OperatingPointsWithRangesz
    Set of parameters that are each picked from a discrete range of values.
    An increase of each parameter is assumed to make the operation slower
    and more accurate.
    A key = int array of indices in the ordered set of parameters.
    c                 C   s   t |  g | _d S r   )r   r   rangesr   r   r   r   r   Z  s   

z"OperatingPointsWithRanges.__init__c                 C   s   | j ||f d S r   )r   r   )r   namevaluesr   r   r   	add_range_     z#OperatingPointsWithRanges.add_rangec                 C   s(   t ||kr	dS t ||krdS dS )Nr   r5   r   )r   rB   r   r   r   r   r   b  s
   z&OperatingPointsWithRanges.compare_keysc                 C   s   t jt| jtdS )Nr,   )r   r8   r.   r   intr   r   r   r   r   i  r   z(OperatingPointsWithRanges.do_nothing_keyc                 C   s   t dd | jD S )Nc                 S   s   g | ]\}}t |qS r   )r.   )r   r   r   r   r   r   
<listcomp>m      z=OperatingPointsWithRanges.num_experiments.<locals>.<listcomp>)r   prodr   r   r   r   r   num_experimentsl  s   z)OperatingPointsWithRanges.num_experimentsc                 C   sX   t jt| jtd}t| jD ]\}\}}|t| ||< |t| }q|dks*J |S )z/Convert a sequential experiment number to a keyr,   r   )r   r8   r.   r   r   	enumerate)r   cnokr   r   r   r   r   r   
cno_to_keyo  s   z$OperatingPointsWithRanges.cno_to_keyc                    s    fddt | jD S )z3Convert a key to a dictionary with parameter valuesc                    s"   i | ]\}\}}|| |  qS r   r   )r   r   r   r   r   r   r   
<dictcomp>z  s    
z<OperatingPointsWithRanges.get_parameters.<locals>.<dictcomp>)r   r   )r   r   r   r   r   get_parametersx  s   
z(OperatingPointsWithRanges.get_parametersc                    sL   | j D ]\}}||kr fdd|D }||dd<  dS qtd| d)z% remove too large values from a rangec                    s   g | ]}| k r|qS r   r   )r   vmax_valr   r   r     r   z<OperatingPointsWithRanges.restrict_range.<locals>.<listcomp>Nz
parameter z
 not found)r   RuntimeError)r   r   r   name2r   val2r   r   r   restrict_range  s   z(OperatingPointsWithRanges.restrict_rangeN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   R  s    	r   c                   @   s   e Zd Zdd Zdd ZdS )	TimerIterc                 C   s2   g | _ |j| _|| _|jdkrt|j d S d S )Nr   )tsrunstimerre   faissomp_set_num_threads)r   r   r   r   r   r     s   
zTimerIter.__init__c                 C   s   | j }|  jd8  _| jt  t| jdkr#| jd | jd  nd}| jdks/||jkrf|jdkr:t	|j
 t| j}|dd  |d d  }t||jkr]||jd  |_t|d d  |_td S )Nr   r[   r5   r   )r   r   r   r   timer.   max_secsre   r   r   remember_ntr   arraywarmuptimesStopIteration)r   r   
total_timer   r   r   r   r   __next__  s   &
zTimerIter.__next__N)r   r   r   r   r   r   r   r   r   r     s    r   c                   @   sD   e Zd ZdZdddejfddZdd Zd	d
 Zdd Z	dd Z
dS )RepeatTimeru!  
    This is yet another timer object. It is adapted to Faiss by
    taking a number of openmp threads to set on input. It should be called
    in an explicit loop as:

    timer = RepeatTimer(warmup=1, nt=1, runs=6)

    for _ in timer:
        # perform operation

    print(f"time={timer.get_ms():.1f} ± {timer.get_ms_std():.1f} ms")

    the same timer can be re-used. In that case it is reset each time it
    enters a loop. It focuses on ms-scale times because for second scale
    it's usually less relevant to repeat the operation.
    r   r5   r   c                 C   s2   ||k sJ || _ || _|| _|| _t | _d S r   )r   re   r   r   r   omp_get_max_threadsr   )r   r   re   r   r   r   r   r   r     s   zRepeatTimer.__init__c                 C   s   t | S r   )r   r   r   r   r   __iter__  s   zRepeatTimer.__iter__c                 C   s   t | jd S )N  )r   rC   r   r   r   r   r   ms  s   zRepeatTimer.msc                 C   s"   t | jdkrt| jd S dS )Nr   r   r>   )r.   r   r   stdr   r   r   r   ms_std  s   "zRepeatTimer.ms_stdc                 C   s
   t | jS )zJ effective number of runs (may be lower than runs - warmup due to timeout))r.   r   r   r   r   r   nruns  s   
zRepeatTimer.nrunsN)r   r   r   r   r   infr   r   r   r   r   r   r   r   r   r     s    r   )r   )r   rV   )numpyr   ro   r   r   multiprocessing.poolr   r   r   r<   r:   rS   rU   ri   rx   r   r   r   r   r   r   r   r   r   <module>   s&   


(.
N?: