add description extraction

This commit is contained in:
robinrolle
2025-04-13 06:56:26 +02:00
parent 49e8e9fa36
commit 9b34b5d4af
3 changed files with 21 additions and 3 deletions

View File

@ -7,7 +7,7 @@ from langchain_core.output_parsers import PydanticOutputParser
from langchain_openai.chat_models import ChatOpenAI
from pydantic import BaseModel
from utils.parsers import process_profile, process_passport, process_account
from utils.parsers import process_profile, process_passport, process_account, process_description
from validation.from_account import FromAccount
from validation.from_passport import FromPassport
from validation.from_profile import FromProfile
@ -23,9 +23,10 @@ def extract_description(client_data: dict[str, Any]) -> FromDescription:
"Passport text:\n{processed_text}"
)
result = __run_extraction_chain(
raw_file_data=passport_data,
file_processor=lambda passport_data: passport_data,
file_processor=process_description,
pydantic_model=FromDescription,
prompt_template=prompt_template,
)

View File

@ -1,9 +1,11 @@
from .passport_parser import process_passport
from .profile_parser import process_profile
from .account_parser import process_account
from .description_parser import process_description
__all__ = [
"process_passport",
"process_profile",
"process_account"
"process_account",
"process_description",
]

View File

@ -0,0 +1,15 @@
import base64
def process_description(desc_b64: str) -> str:
"""
Décode une chaîne base64 et retourne le texte UTF-8 correspondant.
:param base64_text: Donnée encodée en base64.
:return: Chaîne de caractères décodée.
"""
try:
decoded_bytes = base64.b64decode(desc_b64)
decoded_text = decoded_bytes.decode('utf-8')
return decoded_text
except Exception as e:
return f"[Erreur de décodage] {str(e)}"