@ -44,5 +44,3 @@ langchain-openai
|
|||||||
numpy==2.2.4
|
numpy==2.2.4
|
||||||
pymupdf == 1.25.5
|
pymupdf == 1.25.5
|
||||||
flask-cors==5.0.1
|
flask-cors==5.0.1
|
||||||
mistralai==1.6.0
|
|
||||||
|
|
||||||
|
@ -1,22 +1,42 @@
|
|||||||
import os
|
import base64
|
||||||
from mistralai import Mistral
|
import io
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
api_key = "XEOc110BYE4PMj8FQBauxxGZTitRTs2w"
|
from PIL import Image, ImageEnhance
|
||||||
client = Mistral(api_key=api_key)
|
import pytesseract
|
||||||
|
from passporteye import read_mrz
|
||||||
|
import json
|
||||||
|
|
||||||
def process_passport(passport_b64: str) -> str:
|
def process_passport(passport_b64: str) -> str:
|
||||||
mistral_image_url = f"data:image/png;base64,{passport_b64}"
|
"""
|
||||||
|
Traite le passport :
|
||||||
|
- Décodage de l'image en base64.
|
||||||
|
- Application de l'OCR pour extraire le texte.
|
||||||
|
|
||||||
ocr_response = client.ocr.process(
|
:param passport_b64: Chaîne base64 représentant l'image du passport.
|
||||||
model="mistral-ocr-latest",
|
:return: Texte extrait de l'image.
|
||||||
document={
|
"""
|
||||||
"type": "image_url",
|
image_bytes = base64.b64decode(passport_b64)
|
||||||
"image_url": mistral_image_url
|
with NamedTemporaryFile(mode="wb") as tmp_img:
|
||||||
|
tmp_img.write(image_bytes)
|
||||||
|
with open(tmp_img.name, "rb") as read_img:
|
||||||
|
mrz_obj = read_mrz(read_img)
|
||||||
|
|
||||||
|
image = Image.open(io.BytesIO(image_bytes))
|
||||||
|
enhancer = ImageEnhance.Contrast(image)
|
||||||
|
image = enhancer.enhance(2.0) # 2.0 = double le contraste (1.0 = inchangé)
|
||||||
|
tesseract_text = pytesseract.image_to_string(image, lang='eng')
|
||||||
|
out_dict = {}
|
||||||
|
if not mrz_obj is None:
|
||||||
|
number_raw = str(mrz_obj.number)
|
||||||
|
# It's not called a 'Hack'athon for nothing...
|
||||||
|
number = number_raw.replace("B", "8")
|
||||||
|
out_dict = {
|
||||||
|
"country": mrz_obj.country,
|
||||||
|
"names": mrz_obj.names,
|
||||||
|
"number": number,
|
||||||
|
"surname": mrz_obj.surname,
|
||||||
|
"mrz": mrz_obj.aux["text"],
|
||||||
}
|
}
|
||||||
)
|
out_dict["raw"] = tesseract_text
|
||||||
|
out = json.dumps(out_dict)
|
||||||
# Extraire le markdown de toutes les pages
|
return out
|
||||||
markdown_text = "\n\n".join(page.markdown for page in ocr_response.pages)
|
|
||||||
|
|
||||||
return markdown_text # Tu peux aussi retourner juste le chemin si tu préfères
|
|
Reference in New Issue
Block a user