Rename to __run_extraction_chain and create extract_profile

2025-04-12 16:58:41 +02:00
parent 15cfd2f9b6
commit 13b7419db8
2 changed files with 35 additions and 35 deletions
--- a/app.py
+++ b/app.py
@ -1,12 +1,10 @@
-from dto.requests import GameStartRequestDTO
-from services.extractor import run_extraction_chain
-from services.julius_baer_api_client import JuliusBaerApiClient
-from validation.from_passport import FromPassport
-
-from services.player import Player
-from utils.parsers import process_passport
 from flask import Flask
+
 import config
+from dto.requests import GameStartRequestDTO
+from services.extractor import extract_profile
+from services.julius_baer_api_client import JuliusBaerApiClient
+from services.player import Player

 app = Flask(__name__)

@ -15,37 +13,15 @@ app = Flask(__name__)
 def hello_world():
    return 'Hello World!'

+
 if __name__ == '__main__':
    jb_client = JuliusBaerApiClient()
    game_start_request = GameStartRequestDTO(player_name=config.API_TEAM)
    res = jb_client.start_game(game_start_request)

-
-    passport_data = res.client_data.get("passport")
-
-    prompt_template = (
-        "Extract the following information from the provided passport text.\n"
-        "Return only JSON matching this format:\n{format_instructions}\n\n"
-        "Pay special attention to the passport number and signature.\n"
-        "Passport text:\n{processed_text}"
-    )
-
-    result = run_extraction_chain(
-        raw_file_data=passport_data,
-        file_processor=process_passport,
-        pydantic_model=FromPassport,
-        prompt_template=prompt_template,
-    )
-
-    print(result)
+    result = extract_profile(res.client_data)

    player = Player()
    player.play_on_separate_thread()

    app.run()
-
-    # res.session_id
-    # UUID('fde19363-a3d5-432e-8b87-54a6dd54f0dd')
-    # second test UUID('e3d58302-400a-4bc6-9772-ae50de43c9f4')
-    # UUID('f8b2a0a6-d4e0-45e6-900f-8ecb3c28f993')
-    # UUID('f8b2a0a6-d4e0-45e6-900f-8ecb3c28f993')
--- a/services/extractor.py
+++ b/services/extractor.py
@ -1,20 +1,44 @@
 import base64
 import binascii
-from typing import Callable, Type
+from typing import Callable, Type, Any, TypeVar
 from langchain_core.runnables import Runnable
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import PydanticOutputParser
 from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import BaseModel
+
+from utils.parsers import process_profile
+from validation.from_profile import FromProfile


-def run_extraction_chain(
+def extract_profile(client_data: dict[str, Any]):
+    passport_data = client_data.get("profile")
+
+    prompt_template = (
+        "Extract the following information from the provided text.\n"
+        "Return only JSON matching this format:\n{format_instructions}\n\n"
+        "Pay special attention to the passport number and signature.\n"
+        "Passport text:\n{processed_text}"
+    )
+
+    result = __run_extraction_chain(
+        raw_file_data=passport_data,
+        file_processor=process_profile,
+        pydantic_model=FromProfile,
+        prompt_template=prompt_template,
+    )
+
+    return result
+
+ModelType = TypeVar("ModelType", bound=BaseModel)
+def __run_extraction_chain(
    *,
    raw_file_data: str,
    file_processor: Callable[[str], str],
-    pydantic_model: Type,
+    pydantic_model: type[ModelType],
    prompt_template: str,
    model_name: str = "gemini-2.0-flash"
-):
+) -> ModelType:
    """
    Traite un fichier encodé en base64, applique un parser OCR, génère un prompt, envoie à un modèle LLM, et retourne le résultat parsé.