diff --git a/services/extractor.py b/services/extractor.py index e80855e..cdd7757 100644 --- a/services/extractor.py +++ b/services/extractor.py @@ -7,7 +7,7 @@ from langchain_core.output_parsers import PydanticOutputParser from langchain_openai.chat_models import ChatOpenAI from pydantic import BaseModel -from utils.parsers import process_profile, process_passport, process_account +from utils.parsers import process_profile, process_passport, process_account, process_description from validation.from_account import FromAccount from validation.from_passport import FromPassport from validation.from_profile import FromProfile @@ -23,9 +23,10 @@ def extract_description(client_data: dict[str, Any]) -> FromDescription: "Passport text:\n{processed_text}" ) + result = __run_extraction_chain( raw_file_data=passport_data, - file_processor=lambda passport_data: passport_data, + file_processor=process_description, pydantic_model=FromDescription, prompt_template=prompt_template, ) diff --git a/utils/parsers/__init__.py b/utils/parsers/__init__.py index 9a3255a..9a7b1b2 100644 --- a/utils/parsers/__init__.py +++ b/utils/parsers/__init__.py @@ -1,9 +1,11 @@ from .passport_parser import process_passport from .profile_parser import process_profile from .account_parser import process_account +from .description_parser import process_description __all__ = [ "process_passport", "process_profile", - "process_account" + "process_account", + "process_description", ] \ No newline at end of file diff --git a/utils/parsers/description_parser.py b/utils/parsers/description_parser.py new file mode 100644 index 0000000..08925d1 --- /dev/null +++ b/utils/parsers/description_parser.py @@ -0,0 +1,15 @@ +import base64 + +def process_description(desc_b64: str) -> str: + """ + Décode une chaîne base64 et retourne le texte UTF-8 correspondant. + + :param base64_text: Donnée encodée en base64. + :return: Chaîne de caractères décodée. + """ + try: + decoded_bytes = base64.b64decode(desc_b64) + decoded_text = decoded_bytes.decode('utf-8') + return decoded_text + except Exception as e: + return f"[Erreur de décodage] {str(e)}" \ No newline at end of file