diff --git a/services/extractor.py b/services/extractor.py index d4e7b00..6752c63 100644 --- a/services/extractor.py +++ b/services/extractor.py @@ -1,13 +1,15 @@ import base64 import binascii +import io +from PIL import Image +from langchain_core.messages import HumanMessage, SystemMessage from typing import Callable, Type, Any, TypeVar from langchain_core.runnables import Runnable from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import PydanticOutputParser from langchain_openai.chat_models import ChatOpenAI from pydantic import BaseModel - -from utils.parsers import process_profile, process_passport, process_account, process_description +from utils.parsers import process_profile, process_account, process_description ,process_passport from validation.from_account import FromAccount from validation.from_passport import FromPassport from validation.from_profile import FromProfile @@ -54,23 +56,43 @@ def extract_account(client_data: dict[str, Any])-> FromAccount: def extract_passport(client_data: dict[str, Any]) -> FromPassport: - passport_data = client_data.get("passport") + raw_file_data = client_data.get("passport") - prompt_template = ( - "Extract the following information from the provided passport text.\n" - "Return only JSON matching this format:\n{format_instructions}\n\n" - "Pay special attention to the passport number\n" - "Passport text:\n{processed_text}" - ) + if not raw_file_data: + raise ValueError("Missing passport base64 data") - result = __run_extraction_chain( - raw_file_data=passport_data, - file_processor=process_passport, - pydantic_model=FromPassport, - prompt_template=prompt_template, - ) + try: + base64.b64decode(raw_file_data, validate=True) + except binascii.Error as e: + raise ValueError(f"Invalid base64 data: {e}") - return result + # Décodage image + image_bytes = base64.b64decode(raw_file_data) + image = Image.open(io.BytesIO(image_bytes)) + + # Parser Pydantic + parser = PydanticOutputParser(pydantic_object=FromPassport) + format_instructions = parser.get_format_instructions() + + # LLM gpt-4o + llm = ChatOpenAI(model="gpt-4o", temperature=0) + + # Messages multimodaux + messages = [ + SystemMessage(content="Tu es un assistant qui lit les passeports."), + HumanMessage( + content=[ + {"type": "text", "text": f"Lis ce passeport et retourne les infos suivantes au format JSON :\n{format_instructions}"}, + {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + raw_file_data}}, + ] + ) + ] + + # Appel direct du LLM (hors prompt chain) + result = llm.invoke(messages) + + # Parsing structuré + return parser.parse(result.content) def extract_profile(client_data: dict[str, Any]) -> FromProfile: diff --git a/utils/parsers/passport_parser.py b/utils/parsers/passport_parser.py index 372008a..1799da1 100644 --- a/utils/parsers/passport_parser.py +++ b/utils/parsers/passport_parser.py @@ -1,42 +1,39 @@ import base64 import io -from tempfile import NamedTemporaryFile -from PIL import Image, ImageEnhance -import pytesseract -from passporteye import read_mrz -import json +from PIL import Image +from langchain_openai import ChatOpenAI +from langchain.output_parsers import PydanticOutputParser +from langchain_core.prompts import ChatPromptTemplate +from validation.from_passport import FromPassport def process_passport(passport_b64: str) -> str: """ Traite le passport : - Décodage de l'image en base64. - - Application de l'OCR pour extraire le texte. - - :param passport_b64: Chaîne base64 représentant l'image du passport. - :return: Texte extrait de l'image. + - Envoi à GPT-4o avec un prompt d'extraction structuré. + - Parsing structuré avec un modèle Pydantic. """ image_bytes = base64.b64decode(passport_b64) - with NamedTemporaryFile(mode="wb") as tmp_img: - tmp_img.write(image_bytes) - with open(tmp_img.name, "rb") as read_img: - mrz_obj = read_mrz(read_img) - image = Image.open(io.BytesIO(image_bytes)) - enhancer = ImageEnhance.Contrast(image) - image = enhancer.enhance(2.0) # 2.0 = double le contraste (1.0 = inchangé) - tesseract_text = pytesseract.image_to_string(image, lang='eng') - out_dict = {} - if not mrz_obj is None: - number_raw = str(mrz_obj.number) - # It's not called a 'Hack'athon for nothing... - number = number_raw.replace("B", "8") - out_dict = { - "country": mrz_obj.country, - "names": mrz_obj.names, - "number": number, - "surname": mrz_obj.surname, - "mrz": mrz_obj.aux["text"], - } - out_dict["raw"] = tesseract_text - out = json.dumps(out_dict) - return out \ No newline at end of file + + # Parser structuré basé sur le modèle FromPassport + parser = PydanticOutputParser(pydantic_object=FromPassport) + + # Prompt + Instructions pour extraction + prompt = ChatPromptTemplate.from_messages([ + ("system", "Tu es un assistant d'extraction de données de passeport."), + ("human", "Voici l'image d'un passeport. Extrais les informations dans ce format :\n\n{format_instructions}"), + ]) + + # LLM avec vision (GPT-4o) + llm = ChatOpenAI(model="gpt-4o", temperature=0) + + chain = prompt | llm.with_structured_output(parser=parser) + + # Appel du LLM avec l'image en contexte + result = chain.invoke({ + "format_instructions": parser.get_format_instructions(), + "image": image, + }) + + return result.json() \ No newline at end of file