TO CHECK - description chunker implementation

2025-04-13 09:33:02 +02:00
parent 987a6b938f
commit 766c1ec4ca
2 changed files with 35 additions and 48 deletions
--- a/services/advisor.py
+++ b/services/advisor.py
@ -89,43 +89,45 @@ class Advisor:
        prompt = ChatPromptTemplate.from_template(
            """You are an experienced compliance analyst at a prestigious private bank. Your role is to carefully examine client applications by cross-checking data from four provided documents: Passport, Profile, Description, and Account.

-        Your task:
-        Determine whether to ACCEPT or REJECT the client’s private banking application based on data consistency.
+    Your task:
+    Determine whether to ACCEPT or REJECT the client’s private banking application based on data consistency.

-        CRITICAL RULES for rejection (any single issue means rejection):
-        - Mismatch in personal details: names, surnames must match exactly across all documents.
-        - Typos or spelling errors in critical information.
-        - Expired or incorrect validity dates on passports or other documents.
-        - Non-existent or incorrect addresses, including city, street, zip code, and country.
-        - Conflicting information regarding country of domicile.
-        - Suspicious or implausible personal details.
-        - Financial discrepancies between Profile and Description documents.
-        - Mismatching nationality between Passport and Account documents.
+    CRITICAL RULES for rejection (any single issue means rejection):
+    - Mismatch in personal details: names, surnames must match exactly across all documents.
+    - Typos or spelling errors in critical information.
+    - Expired or incorrect validity dates on passports or other documents.
+    - Non-existent or incorrect addresses, including city, street, zip code, and country.
+    - Conflicting information regarding country of domicile.
+    - Suspicious or implausible personal details.
+    - Financial discrepancies between Profile and Description documents.
+    - Mismatching nationality between Passport and Account documents.

-        ADDITIONAL INSTRUCTIONS:
-        - Cross-check the Profile, Description, and Account information meticulously against the Passport.
-        - Historical occupation details may legitimately differ from current data—this alone does not imply inconsistency.
-        - Always verify currency consistency when evaluating monetary amounts.
-        - Be extremely cautious—reject immediately if there's any uncertainty or if any detail appears suspicious.
-        - NEVER fabricate or assume information; rely strictly on provided data.
+    ADDITIONAL INSTRUCTIONS:
+    - Cross-check the Profile, Description, and Account information meticulously against the Passport.
+    - Historical occupation details may legitimately differ from current data—this alone does not imply inconsistency.
+    - Always verify currency consistency when evaluating monetary amounts.
+    - Be extremely cautious—reject immediately if there's any uncertainty or if any detail appears suspicious.
+    - NEVER fabricate or assume information; rely strictly on provided data.

-        RESPOND STRICTLY IN THE FOLLOWING JSON FORMAT:
-        {format_instructions}
+    RESPOND STRICTLY IN THE FOLLOWING JSON FORMAT:
+    {format_instructions}

-        ---
+    ---

-        **Passport Document:**
-        {passport}
+    **Passport Document:**
+    {passport}

-        **Profile Document:**
-        {profile}
+    **Profile Document:**
+    {profile}

-        **Description Document:**
-        {description}
+    **Description Document:**
+    The following is a single physical document, structured into multiple thematic sections. Each section starts with a heading followed by relevant content.

-        **Account Document:**
-        {account}
-        """
+    {description}
+
+    **Account Document:**
+    {account}
+    """
        )

        # 4. Chaîne LLM
--- a/services/extractor.py
+++ b/services/extractor.py
@ -13,25 +13,10 @@ from validation.from_passport import FromPassport
 from validation.from_profile import FromProfile
 from validation.from_description import FromDescription

-def extract_description(client_data: dict[str, Any]) -> FromDescription:
-    passport_data = client_data.get("description")
-
-    prompt_template = (
-        "Extract the following information from the provided passport text.\n"
-        "Return only JSON matching this format:\n{format_instructions}\n\n"
-        "Pay special attention to the passport number\n"
-        "Passport text:\n{processed_text}"
-    )
-
-
-    result = __run_extraction_chain(
-        raw_file_data=passport_data,
-        file_processor=process_description,
-        pydantic_model=FromDescription,
-        prompt_template=prompt_template,
-    )
-
-    return result
+def extract_description(client_data: dict[str, Any]) -> dict[str, str]:
+    description_data = client_data.get("description")
+    parsed_data = process_description(description_data)
+    return parsed_data

 def extract_account(client_data: dict[str, Any])-> FromAccount:
    account_data = client_data.get("account")