Rename to __run_extraction_chain and create extract_profile

This commit is contained in:
dylan
2025-04-12 16:58:41 +02:00
parent 15cfd2f9b6
commit 13b7419db8
2 changed files with 35 additions and 35 deletions

View File

@ -1,20 +1,44 @@
import base64
import binascii
from typing import Callable, Type
from typing import Callable, Type, Any, TypeVar
from langchain_core.runnables import Runnable
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import BaseModel
from utils.parsers import process_profile
from validation.from_profile import FromProfile
def run_extraction_chain(
def extract_profile(client_data: dict[str, Any]):
passport_data = client_data.get("profile")
prompt_template = (
"Extract the following information from the provided text.\n"
"Return only JSON matching this format:\n{format_instructions}\n\n"
"Pay special attention to the passport number and signature.\n"
"Passport text:\n{processed_text}"
)
result = __run_extraction_chain(
raw_file_data=passport_data,
file_processor=process_profile,
pydantic_model=FromProfile,
prompt_template=prompt_template,
)
return result
ModelType = TypeVar("ModelType", bound=BaseModel)
def __run_extraction_chain(
*,
raw_file_data: str,
file_processor: Callable[[str], str],
pydantic_model: Type,
pydantic_model: type[ModelType],
prompt_template: str,
model_name: str = "gemini-2.0-flash"
):
) -> ModelType:
"""
Traite un fichier encodé en base64, applique un parser OCR, génère un prompt, envoie à un modèle LLM, et retourne le résultat parsé.