Files
julius_baer_onboarding/utils/parsers/passport_parser.py

23 lines
649 B
Python
Raw Normal View History

2025-04-13 10:03:32 +02:00
import os
from mistralai import Mistral
import config
api_key = config.MISTRAL_API_KEY
client = Mistral(api_key=api_key)
2025-04-12 10:51:07 +02:00
2025-04-13 10:03:32 +02:00
def process_passport(passport_b64: str) -> str:
mistral_image_url = f"data:image/jpeg;base64,{passport_b64}"
2025-04-12 20:31:20 +02:00
2025-04-13 10:03:32 +02:00
ocr_response = client.ocr.process(
model="mistral-ocr-latest",
document={
"type": "image_url",
"image_url": mistral_image_url
2025-04-12 22:29:54 +02:00
}
2025-04-13 10:03:32 +02:00
)
# Extraire le markdown de toutes les pages
markdown_text = "\n\n".join(page.markdown for page in ocr_response.pages)
2025-04-13 10:22:38 +02:00
return markdown_text # Tu peux aussi retourner juste le chemin si tu préfèresOPENAI_API_KEY