From 13b7419db8780aee32f43163d47859ac4ac37ce2 Mon Sep 17 00:00:00 2001 From: dylan <12473240+dlmw@users.noreply.github.com> Date: Sat, 12 Apr 2025 16:58:41 +0200 Subject: [PATCH 1/6] Rename to __run_extraction_chain and create extract_profile --- app.py | 38 +++++++------------------------------- services/extractor.py | 32 ++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/app.py b/app.py index f29ad56..1c1cbbb 100644 --- a/app.py +++ b/app.py @@ -1,12 +1,10 @@ -from dto.requests import GameStartRequestDTO -from services.extractor import run_extraction_chain -from services.julius_baer_api_client import JuliusBaerApiClient -from validation.from_passport import FromPassport - -from services.player import Player -from utils.parsers import process_passport from flask import Flask + import config +from dto.requests import GameStartRequestDTO +from services.extractor import extract_profile +from services.julius_baer_api_client import JuliusBaerApiClient +from services.player import Player app = Flask(__name__) @@ -15,37 +13,15 @@ app = Flask(__name__) def hello_world(): return 'Hello World!' + if __name__ == '__main__': jb_client = JuliusBaerApiClient() game_start_request = GameStartRequestDTO(player_name=config.API_TEAM) res = jb_client.start_game(game_start_request) - - passport_data = res.client_data.get("passport") - - prompt_template = ( - "Extract the following information from the provided passport text.\n" - "Return only JSON matching this format:\n{format_instructions}\n\n" - "Pay special attention to the passport number and signature.\n" - "Passport text:\n{processed_text}" - ) - - result = run_extraction_chain( - raw_file_data=passport_data, - file_processor=process_passport, - pydantic_model=FromPassport, - prompt_template=prompt_template, - ) - - print(result) + result = extract_profile(res.client_data) player = Player() player.play_on_separate_thread() app.run() - - # res.session_id - # UUID('fde19363-a3d5-432e-8b87-54a6dd54f0dd') - # second test UUID('e3d58302-400a-4bc6-9772-ae50de43c9f4') - # UUID('f8b2a0a6-d4e0-45e6-900f-8ecb3c28f993') - # UUID('f8b2a0a6-d4e0-45e6-900f-8ecb3c28f993') \ No newline at end of file diff --git a/services/extractor.py b/services/extractor.py index 72784b1..9e777a7 100644 --- a/services/extractor.py +++ b/services/extractor.py @@ -1,20 +1,44 @@ import base64 import binascii -from typing import Callable, Type +from typing import Callable, Type, Any, TypeVar from langchain_core.runnables import Runnable from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import PydanticOutputParser from langchain_google_genai import ChatGoogleGenerativeAI +from pydantic import BaseModel + +from utils.parsers import process_profile +from validation.from_profile import FromProfile -def run_extraction_chain( +def extract_profile(client_data: dict[str, Any]): + passport_data = client_data.get("profile") + + prompt_template = ( + "Extract the following information from the provided text.\n" + "Return only JSON matching this format:\n{format_instructions}\n\n" + "Pay special attention to the passport number and signature.\n" + "Passport text:\n{processed_text}" + ) + + result = __run_extraction_chain( + raw_file_data=passport_data, + file_processor=process_profile, + pydantic_model=FromProfile, + prompt_template=prompt_template, + ) + + return result + +ModelType = TypeVar("ModelType", bound=BaseModel) +def __run_extraction_chain( *, raw_file_data: str, file_processor: Callable[[str], str], - pydantic_model: Type, + pydantic_model: type[ModelType], prompt_template: str, model_name: str = "gemini-2.0-flash" -): +) -> ModelType: """ Traite un fichier encodé en base64, applique un parser OCR, génère un prompt, envoie à un modèle LLM, et retourne le résultat parsé. From bc3ba59dd67aaef4ada271c49e0dfc6f8980d632 Mon Sep 17 00:00:00 2001 From: dylan <12473240+dlmw@users.noreply.github.com> Date: Sat, 12 Apr 2025 16:59:43 +0200 Subject: [PATCH 2/6] Rename to __run_extraction_chain and create extract_profile --- app.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app.py b/app.py index 8e4c37d..4b396eb 100644 --- a/app.py +++ b/app.py @@ -1,12 +1,12 @@ -from dto.requests import GameStartRequestDTO -from services.extractor import run_extraction_chain -from services.julius_baer_api_client import JuliusBaerApiClient -from validation.from_passport import FromPassport +import logging -from services.player import Player -from utils.parsers import process_passport from flask import Flask + import config +from dto.requests import GameStartRequestDTO +from services.extractor import extract_profile +from services.julius_baer_api_client import JuliusBaerApiClient +from services.player import Player app = Flask(__name__) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(module)s] - %(message)s') From 0d39c00d45dbe06f1ad3db6c55813c9e52b2d4e9 Mon Sep 17 00:00:00 2001 From: Nitwix Date: Sat, 12 Apr 2025 17:05:07 +0200 Subject: [PATCH 3/6] Add decision log entries --- resources/decision_log2.csv | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/resources/decision_log2.csv b/resources/decision_log2.csv index 3bd6296..e3a892e 100644 --- a/resources/decision_log2.csv +++ b/resources/decision_log2.csv @@ -8,3 +8,21 @@ a482b28b-6f4c-4f2d-a167-42c071a92470,Accept 2d7d0293-291c-46ec-91fb-75351be347f8,Accept 3006006d-1060-4b53-afd3-b702a8fc2358,Accept f4a42e3e-75a8-43dc-92fe-e38fe23b1d82,Accept +a5e06a84-2b05-47d1-8149-649d7a9e8bb6,Accept +36adf081-fef6-4696-81da-e6ba73a6c8a0,Accept +154b3c9d-a2e0-4d40-bb39-f3a249b26bc2,Accept +71fdff4e-466a-40c8-a944-4958be13f974,Accept +7b20c9c6-1bd6-4675-9e46-9b9829e50252,Accept +1591ebcd-d0c2-44c7-b130-72f9a15c8a35,Accept +10fd6524-a2a0-4ecf-953e-6c758f7147dc,Accept +44418b3d-e2cd-4105-a599-5165c00c4971,Accept +06dcf9d6-3ec4-451c-9bf4-75cb9c8061ee,Accept +d1d0eb32-9f99-422c-a404-606b4d5c3a10,Accept +3efa6b4e-8a4b-43d8-b570-0d02dd28b5ee,Accept +42d0b5e4-03ab-4199-a74b-2f3fbe68680a,Accept +1ad14242-15a9-4142-bc1f-c3fdd1165021,Accept +e11b296a-5278-49de-a6b1-01aa31a508c4,Accept +ad6b6980-7b2e-4af9-bda0-2ec004574211,Accept +ac62e33d-6645-4360-8e14-21bfb0b6902a,Accept +d5789f9c-a0f4-4663-9c6e-0d416bbbffb8,Accept +25a429d6-bd8a-45d2-af1d-a0b8e5ec2e72,Accept From 222b3102dcd46d9080fd93410b9ae9b96fe8375f Mon Sep 17 00:00:00 2001 From: dylan <12473240+dlmw@users.noreply.github.com> Date: Sat, 12 Apr 2025 17:07:09 +0200 Subject: [PATCH 4/6] Add type to function --- services/extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/extractor.py b/services/extractor.py index 9e777a7..3545b61 100644 --- a/services/extractor.py +++ b/services/extractor.py @@ -11,7 +11,7 @@ from utils.parsers import process_profile from validation.from_profile import FromProfile -def extract_profile(client_data: dict[str, Any]): +def extract_profile(client_data: dict[str, Any]) -> FromProfile: passport_data = client_data.get("profile") prompt_template = ( From f483877303aa5b0d7a745bba7be07e610596ad01 Mon Sep 17 00:00:00 2001 From: robinrolle Date: Sat, 12 Apr 2025 17:15:43 +0200 Subject: [PATCH 5/6] add passport extractor --- services/extractor.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/services/extractor.py b/services/extractor.py index 3545b61..2cd3684 100644 --- a/services/extractor.py +++ b/services/extractor.py @@ -7,11 +7,32 @@ from langchain_core.output_parsers import PydanticOutputParser from langchain_google_genai import ChatGoogleGenerativeAI from pydantic import BaseModel -from utils.parsers import process_profile +from utils.parsers import process_profile, process_passport +from validation.from_passport import FromPassport from validation.from_profile import FromProfile -def extract_profile(client_data: dict[str, Any]) -> FromProfile: +def extract_passport(client_data: dict[str, Any]): + passport_data = client_data.get("passport") + + prompt_template = ( + "Extract the following information from the provided passport text.\n" + "Return only JSON matching this format:\n{format_instructions}\n\n" + "Pay special attention to the passport number\n" + "Passport text:\n{processed_text}" + ) + + result = __run_extraction_chain( + raw_file_data=passport_data, + file_processor=process_passport, + pydantic_model=FromPassport, + prompt_template=prompt_template, + ) + + return result + + +def extract_profile(client_data: dict[str, Any]): passport_data = client_data.get("profile") prompt_template = ( From 9985b27d5432d19a0b85fec626420feb6919c94c Mon Sep 17 00:00:00 2001 From: dylan <12473240+dlmw@users.noreply.github.com> Date: Sat, 12 Apr 2025 17:28:55 +0200 Subject: [PATCH 6/6] Add GOOGLE_API_KEY in .env.example --- .env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 2260d4f..2e91728 100644 --- a/.env.example +++ b/.env.example @@ -2,4 +2,4 @@ API_URI= API_KEY= API_TEAM= GAME_FILES_DIR=/project_absolute_path/game_files -GROQ_API_KEY=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 \ No newline at end of file +GOOGLE_API_KEY= \ No newline at end of file