
    Ai                     X   d dl Z d dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZmZmZmZmZmZmZ d dlmZ  ee
          Zdedej        fd	Zd
edee         defdZdedeeef         defdZdededee         fdZdefdZedk    r e            Z  e!de             dS dS )    N)ListDict)OpenAI)
OPENAI_API_KEYEMBED_MODEL	LLM_MODEL	INDEX_DIRARTIFACTS_DIR
CATEGORIESTOP_K_PER_CATEGORYMAX_FINDINGS_PER_CATEGORYRUN_ID	INPUT_DIR)
FaissStore)api_keyqueryreturnc                     t           j                            t          | g          }t	          j        |j        d         j        gd          S )z.Convert a text query into an embedding vector.)modelinputr   float32)dtype)client
embeddingscreater   nparraydata	embedding)r   resps     DC:\Users\Terasoftware\OneDrive\Desktop\Graph Rag\rag_suite\rag\qa.pyembed_queryr"      sJ    ##g $  D 8TYq\+,I>>>>    questionsearch_resultsc           	      h   |sdS g }|D ]x}|                     di           }|                     dd          }|                     dd          }|                     dd          }|                    d	| d
| d|            yd                    |dd                   }d|  d|dd          d}		 t          j        j                            t          d|	dgdd          }
|
j        d         j	        j
                                        S # t          $ r}dt          |           cY d}~S d}~ww xY w)z\
    Generate an LLM answer based on retrieved chunks.
    Used by main.py ask command.
    z/No relevant information found in the documents.metadatadoc_nameUnknownpage_number?text [z, Page z]
z

N   zUYou are an AI assistant analyzing annual reports and financial documents.

Question: z

Context from documents:
i.  a  

Instructions:
- Provide a clear, factual answer based ONLY on the context above
- Cite sources using [Document Name, Page X] format after each claim
- If the information is not in the context, say "Information not available in the provided documents"
- Be concise but complete

Answer:userrolecontentg?iX  )r   messagestemperature
max_tokensr   zError generating answer: )getappendjoinr   chatcompletionsr   r   choicesmessager3   strip	Exceptionstr)r$   r%   context_partsrmetar(   pager,   contextpromptresponsees               r!   answer_question_with_llmrI   (   s   
  A@@ M C CuuZ$$88J	22xxs++xx##AAA$AA4AABBBBkk-,--G 
 	%  F	4;*11%&99:	 2 
 
 "*288::: 4 4 433q66333333334s   1AD 
D1D,&D1,D1evidence	chunk_mapc                 v   |                     |                      d                    }|sdS |                      dd                                          }|sdS ||d                                         vrdS |                      d          |d         k    rdS |                      d          |d         k    rdS d	S )
z
    Validate that evidence actually exists in the retrieved chunks.
    Checks: chunk_id exists, quote is in text, doc_name matches, page matches.
    chunk_idFquoter-   r,   r(   rD   r*   T)r7   lower)rJ   rK   chunkrN   s       r!   validate_evidencerQ   Y   s    
 MM(,,z2233E uLL"%%++--E uE&M''))) u||J5#44 u||Fu]33 u4r#   categoryc                    t          j        t                    }t          |          }|                    |t
                    }g }i |D ]P}|d         }|d         |d         |d         |d         |d         d}|                    |           ||d         <   Qt          j        	                    t          d	|                                                     d
d                              dd           d          }	t          j        t          d           t          |	dd          5 }
t          j        ||
d           ddd           n# 1 swxY w Y   d|  dt          j        |d           d|  d}	 t$          j        j                            t,          d|dgd          }t          j        |j        d         j        j                  }n:# t          j        t8          f$ r!}t;          d|  d|            g cY d}~S d}~ww xY wg }|                    d g           dt>                   D ]@}
fd!|
                    d"g           D             }|r||
d"<   |                    |
           A|S )#z
    For a given category (e.g., 'Risks', 'Management'), retrieve relevant chunks
    and use LLM to extract structured findings with evidence.
    )kr'   rM   r(   r*   
source_urlr,   )rM   r(   r*   rU   r,   retrieval_bundle_ _/z.jsonTexist_okwutf-8encoding   )indentNzD
You are a pharma-grade annual report extraction engine.

Category: u   

RULES:
- Return VALID JSON ONLY
- Use ONLY the provided chunks
- If unsupported, OMIT the finding
- Each finding must include evidence with doc_name, page, chunk_id, quote (≤25 words)

CHUNKS:
z"

OUTPUT SCHEMA:
{
  "category": "aC  ",
  "findings": [
    {
      "finding": "Short description",
      "severity": "High|Medium|Low",
      "confidence": "High|Medium|Low",
      "evidence": [
        {
          "doc_name": "...",
          "page": 123,
          "chunk_id": "...",
          "quote": "exact quote from text"
        }
      ]
    }
  ]
}
r0   r1   r   )r   r4   r5   u%   ⚠️ Error extracting findings for z: findingsc                 4    g | ]}t          |          |S  )rQ   ).0rH   rK   s     r!   
<listcomp>z1extract_findings_per_category.<locals>.<listcomp>   s9     
 
 
 I..

 
 
r#   rJ   ) r   loadr	   r"   searchr   r8   ospathr9   r
   rO   replacemakedirsopenjsondumpdumpsr   r:   r;   r   r   loadsr<   r=   r3   JSONDecodeErrorr?   printr7   r   )rR   r   storeqvechitsbundlehrC   entrybundle_pathfrF   r    resultrH   rb   valid_evidencerK   s                    @r!   extract_findings_per_categoryr~   u   s   
 OI&&EuD<< 2<33DFI 
, 
,}Z(Z(.|,L
 
 	e&+	$z"## ',,WHNN,,44S#>>FFsCPPWWW K K----	k3	1	1	1 'Q	&!A&&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' '! ! ! F1! !  !! ! !FF	{&--%&99: . 
 

 DLO3;<< ),   EhEE!EEFFF						 HZZ
B''(B)B(BC  
 
 
 
uuZ,,
 
 

  	*AjMOOAOs1   -EEE=AG H-H	H	Hc                  V   g } d}t          d           t          j                    D ]u\  }}t          d| d           t          ||          }|D ]*}d|d|d<   ||d<   |dz  }|                     |           +t          d	t          |           d
           v| D ]e}d |d         D             }t          |          |d<   dddd                    |                    dd          d          }|d|d         z   z  |d<   f|                     d d           d | dd         D             }t          t          t          |           | |d}	t          j        t          d           t          j                            t          d          }
t          |
dd          5 }t!          j        |	|dd !           ddd           n# 1 swxY w Y   t          d"t          |                       t          d#d$                    |                      t          d%|
 d&           |
S )'z
    Generate the master findings.json file by:
    1. Extracting findings for each category
    2. Scoring and ranking findings
    3. Identifying top 10 critical findings
       u*   
🔍 Extracting findings per category...
z  Processing: z...zF-03didrR   u       → Found z	 findingsc                     h | ]
}|d          S )r(   rd   )re   rH   s     r!   	<setcomp>z)generate_findings_json.<locals>.<setcomp>   s    888Q1Z=888r#   rJ   recurrence_count   r`   )HighMediumLowseverityr   scorec                     | d         S )Nr   rd   )xs    r!   <lambda>z(generate_findings_json.<locals>.<lambda>   s
    AgJ r#   T)keyreversec                     g | ]
}|d          S )r   rd   )re   r{   s     r!   rf   z*generate_findings_json.<locals>.<listcomp>   s    555a!D'555r#   N
   )run_idinput_foldertotal_findingsrb   
top_10_idsrZ   zfindings.jsonr\   r]   r^   F)ra   ensure_asciiu   
✅ Total findings: u   ✅ Top 10 critical findings: z, u   ✅ findings.json saved at: 
)rs   r   itemsr~   r8   lenr7   sortr   r   ri   rl   r
   rj   r9   rm   rn   ro   )all_findingsfidrR   r   rb   r{   doc_set	sev_scorer   findings_dataout_paths              r!   generate_findings_jsonr      s    L
C	
8999%+-- 9 9%,x,,,---05AA 	# 	#A$3nnnAdG$AjM1HC""""7s8}}7778888  = =88!J-888 #G
!A66::155U;S;SUVWW	!a(:&;";<'

 ..===55<#4555J !l++   M K----w||M?;;H	hg	.	.	. B!	-15AAAAB B B B B B B B B B B B B B B 

63|#4#4
6
6777	
B499Z+@+@
B
BCCC	
5
5
5
5666Os   "GGG__main__u   ✅ findings.json generated at )"rn   ri   typingr   r   numpyr   openair   
rag.configr   r   r   r	   r
   r   r   r   r   r   rag.vectorstorer   r   r@   ndarrayr"   rI   boolrQ   r~   r   __name__rj   rs   rd   r#   r!   <module>r      s    				                                          ' & & & & &		'	'	'?s ?rz ? ? ? ?+4s +4DJ +43 +4 +4 +4 +4b c4i T    8ZC Z ZT
 Z Z Z Z@2 2 2 2 2j z 4!!##D	E
2D
2
2333334 4r#   