add dumb mistral exemple

This commit is contained in:
robinrolle
2025-04-13 08:03:45 +02:00
parent 804b47145f
commit d87c8f948d
2 changed files with 20 additions and 38 deletions

View File

@ -44,3 +44,5 @@ langchain-openai
numpy==2.2.4 numpy==2.2.4
pymupdf == 1.25.5 pymupdf == 1.25.5
flask-cors==5.0.1 flask-cors==5.0.1
mistralai==1.6.0

View File

@ -1,42 +1,22 @@
import base64 import os
import io from mistralai import Mistral
from tempfile import NamedTemporaryFile
from PIL import Image, ImageEnhance api_key = "XEOc110BYE4PMj8FQBauxxGZTitRTs2w"
import pytesseract client = Mistral(api_key=api_key)
from passporteye import read_mrz
import json
def process_passport(passport_b64: str) -> str: def process_passport(passport_b64: str) -> str:
""" mistral_image_url = f"data:image/png;base64,{passport_b64}"
Traite le passport :
- Décodage de l'image en base64.
- Application de l'OCR pour extraire le texte.
:param passport_b64: Chaîne base64 représentant l'image du passport. ocr_response = client.ocr.process(
:return: Texte extrait de l'image. model="mistral-ocr-latest",
""" document={
image_bytes = base64.b64decode(passport_b64) "type": "image_url",
with NamedTemporaryFile(mode="wb") as tmp_img: "image_url": mistral_image_url
tmp_img.write(image_bytes)
with open(tmp_img.name, "rb") as read_img:
mrz_obj = read_mrz(read_img)
image = Image.open(io.BytesIO(image_bytes))
enhancer = ImageEnhance.Contrast(image)
image = enhancer.enhance(2.0) # 2.0 = double le contraste (1.0 = inchangé)
tesseract_text = pytesseract.image_to_string(image, lang='eng')
out_dict = {}
if not mrz_obj is None:
number_raw = str(mrz_obj.number)
# It's not called a 'Hack'athon for nothing...
number = number_raw.replace("B", "8")
out_dict = {
"country": mrz_obj.country,
"names": mrz_obj.names,
"number": number,
"surname": mrz_obj.surname,
"mrz": mrz_obj.aux["text"],
} }
out_dict["raw"] = tesseract_text )
out = json.dumps(out_dict)
return out # Extraire le markdown de toutes les pages
markdown_text = "\n\n".join(page.markdown for page in ocr_response.pages)
return markdown_text # Tu peux aussi retourner juste le chemin si tu préfères