diff --git a/app.py b/app.py index f29ad56..1c1cbbb 100644 --- a/app.py +++ b/app.py @@ -1,12 +1,10 @@ -from dto.requests import GameStartRequestDTO -from services.extractor import run_extraction_chain -from services.julius_baer_api_client import JuliusBaerApiClient -from validation.from_passport import FromPassport - -from services.player import Player -from utils.parsers import process_passport from flask import Flask + import config +from dto.requests import GameStartRequestDTO +from services.extractor import extract_profile +from services.julius_baer_api_client import JuliusBaerApiClient +from services.player import Player app = Flask(__name__) @@ -15,37 +13,15 @@ app = Flask(__name__) def hello_world(): return 'Hello World!' + if __name__ == '__main__': jb_client = JuliusBaerApiClient() game_start_request = GameStartRequestDTO(player_name=config.API_TEAM) res = jb_client.start_game(game_start_request) - - passport_data = res.client_data.get("passport") - - prompt_template = ( - "Extract the following information from the provided passport text.\n" - "Return only JSON matching this format:\n{format_instructions}\n\n" - "Pay special attention to the passport number and signature.\n" - "Passport text:\n{processed_text}" - ) - - result = run_extraction_chain( - raw_file_data=passport_data, - file_processor=process_passport, - pydantic_model=FromPassport, - prompt_template=prompt_template, - ) - - print(result) + result = extract_profile(res.client_data) player = Player() player.play_on_separate_thread() app.run() - - # res.session_id - # UUID('fde19363-a3d5-432e-8b87-54a6dd54f0dd') - # second test UUID('e3d58302-400a-4bc6-9772-ae50de43c9f4') - # UUID('f8b2a0a6-d4e0-45e6-900f-8ecb3c28f993') - # UUID('f8b2a0a6-d4e0-45e6-900f-8ecb3c28f993') \ No newline at end of file diff --git a/services/extractor.py b/services/extractor.py index 72784b1..9e777a7 100644 --- a/services/extractor.py +++ b/services/extractor.py @@ -1,20 +1,44 @@ import base64 import binascii -from typing import Callable, Type +from typing import Callable, Type, Any, TypeVar from langchain_core.runnables import Runnable from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import PydanticOutputParser from langchain_google_genai import ChatGoogleGenerativeAI +from pydantic import BaseModel + +from utils.parsers import process_profile +from validation.from_profile import FromProfile -def run_extraction_chain( +def extract_profile(client_data: dict[str, Any]): + passport_data = client_data.get("profile") + + prompt_template = ( + "Extract the following information from the provided text.\n" + "Return only JSON matching this format:\n{format_instructions}\n\n" + "Pay special attention to the passport number and signature.\n" + "Passport text:\n{processed_text}" + ) + + result = __run_extraction_chain( + raw_file_data=passport_data, + file_processor=process_profile, + pydantic_model=FromProfile, + prompt_template=prompt_template, + ) + + return result + +ModelType = TypeVar("ModelType", bound=BaseModel) +def __run_extraction_chain( *, raw_file_data: str, file_processor: Callable[[str], str], - pydantic_model: Type, + pydantic_model: type[ModelType], prompt_template: str, model_name: str = "gemini-2.0-flash" -): +) -> ModelType: """ Traite un fichier encodé en base64, applique un parser OCR, génère un prompt, envoie à un modèle LLM, et retourne le résultat parsé.