get rid of local ocr, using gpt-4o instead
This commit is contained in:
@ -1,13 +1,15 @@
|
|||||||
import base64
|
import base64
|
||||||
import binascii
|
import binascii
|
||||||
|
import io
|
||||||
|
from PIL import Image
|
||||||
|
from langchain_core.messages import HumanMessage, SystemMessage
|
||||||
from typing import Callable, Type, Any, TypeVar
|
from typing import Callable, Type, Any, TypeVar
|
||||||
from langchain_core.runnables import Runnable
|
from langchain_core.runnables import Runnable
|
||||||
from langchain_core.prompts import ChatPromptTemplate
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
from langchain_core.output_parsers import PydanticOutputParser
|
from langchain_core.output_parsers import PydanticOutputParser
|
||||||
from langchain_openai.chat_models import ChatOpenAI
|
from langchain_openai.chat_models import ChatOpenAI
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from utils.parsers import process_profile, process_account, process_description ,process_passport
|
||||||
from utils.parsers import process_profile, process_passport, process_account, process_description
|
|
||||||
from validation.from_account import FromAccount
|
from validation.from_account import FromAccount
|
||||||
from validation.from_passport import FromPassport
|
from validation.from_passport import FromPassport
|
||||||
from validation.from_profile import FromProfile
|
from validation.from_profile import FromProfile
|
||||||
@ -54,23 +56,43 @@ def extract_account(client_data: dict[str, Any])-> FromAccount:
|
|||||||
|
|
||||||
|
|
||||||
def extract_passport(client_data: dict[str, Any]) -> FromPassport:
|
def extract_passport(client_data: dict[str, Any]) -> FromPassport:
|
||||||
passport_data = client_data.get("passport")
|
raw_file_data = client_data.get("passport")
|
||||||
|
|
||||||
prompt_template = (
|
if not raw_file_data:
|
||||||
"Extract the following information from the provided passport text.\n"
|
raise ValueError("Missing passport base64 data")
|
||||||
"Return only JSON matching this format:\n{format_instructions}\n\n"
|
|
||||||
"Pay special attention to the passport number\n"
|
try:
|
||||||
"Passport text:\n{processed_text}"
|
base64.b64decode(raw_file_data, validate=True)
|
||||||
|
except binascii.Error as e:
|
||||||
|
raise ValueError(f"Invalid base64 data: {e}")
|
||||||
|
|
||||||
|
# Décodage image
|
||||||
|
image_bytes = base64.b64decode(raw_file_data)
|
||||||
|
image = Image.open(io.BytesIO(image_bytes))
|
||||||
|
|
||||||
|
# Parser Pydantic
|
||||||
|
parser = PydanticOutputParser(pydantic_object=FromPassport)
|
||||||
|
format_instructions = parser.get_format_instructions()
|
||||||
|
|
||||||
|
# LLM gpt-4o
|
||||||
|
llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
||||||
|
|
||||||
|
# Messages multimodaux
|
||||||
|
messages = [
|
||||||
|
SystemMessage(content="Tu es un assistant qui lit les passeports."),
|
||||||
|
HumanMessage(
|
||||||
|
content=[
|
||||||
|
{"type": "text", "text": f"Lis ce passeport et retourne les infos suivantes au format JSON :\n{format_instructions}"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + raw_file_data}},
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
]
|
||||||
|
|
||||||
result = __run_extraction_chain(
|
# Appel direct du LLM (hors prompt chain)
|
||||||
raw_file_data=passport_data,
|
result = llm.invoke(messages)
|
||||||
file_processor=process_passport,
|
|
||||||
pydantic_model=FromPassport,
|
|
||||||
prompt_template=prompt_template,
|
|
||||||
)
|
|
||||||
|
|
||||||
return result
|
# Parsing structuré
|
||||||
|
return parser.parse(result.content)
|
||||||
|
|
||||||
|
|
||||||
def extract_profile(client_data: dict[str, Any]) -> FromProfile:
|
def extract_profile(client_data: dict[str, Any]) -> FromProfile:
|
||||||
|
@ -1,42 +1,39 @@
|
|||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
from tempfile import NamedTemporaryFile
|
from PIL import Image
|
||||||
from PIL import Image, ImageEnhance
|
from langchain_openai import ChatOpenAI
|
||||||
import pytesseract
|
from langchain.output_parsers import PydanticOutputParser
|
||||||
from passporteye import read_mrz
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
import json
|
from validation.from_passport import FromPassport
|
||||||
|
|
||||||
def process_passport(passport_b64: str) -> str:
|
def process_passport(passport_b64: str) -> str:
|
||||||
"""
|
"""
|
||||||
Traite le passport :
|
Traite le passport :
|
||||||
- Décodage de l'image en base64.
|
- Décodage de l'image en base64.
|
||||||
- Application de l'OCR pour extraire le texte.
|
- Envoi à GPT-4o avec un prompt d'extraction structuré.
|
||||||
|
- Parsing structuré avec un modèle Pydantic.
|
||||||
:param passport_b64: Chaîne base64 représentant l'image du passport.
|
|
||||||
:return: Texte extrait de l'image.
|
|
||||||
"""
|
"""
|
||||||
image_bytes = base64.b64decode(passport_b64)
|
image_bytes = base64.b64decode(passport_b64)
|
||||||
with NamedTemporaryFile(mode="wb") as tmp_img:
|
|
||||||
tmp_img.write(image_bytes)
|
|
||||||
with open(tmp_img.name, "rb") as read_img:
|
|
||||||
mrz_obj = read_mrz(read_img)
|
|
||||||
|
|
||||||
image = Image.open(io.BytesIO(image_bytes))
|
image = Image.open(io.BytesIO(image_bytes))
|
||||||
enhancer = ImageEnhance.Contrast(image)
|
|
||||||
image = enhancer.enhance(2.0) # 2.0 = double le contraste (1.0 = inchangé)
|
# Parser structuré basé sur le modèle FromPassport
|
||||||
tesseract_text = pytesseract.image_to_string(image, lang='eng')
|
parser = PydanticOutputParser(pydantic_object=FromPassport)
|
||||||
out_dict = {}
|
|
||||||
if not mrz_obj is None:
|
# Prompt + Instructions pour extraction
|
||||||
number_raw = str(mrz_obj.number)
|
prompt = ChatPromptTemplate.from_messages([
|
||||||
# It's not called a 'Hack'athon for nothing...
|
("system", "Tu es un assistant d'extraction de données de passeport."),
|
||||||
number = number_raw.replace("B", "8")
|
("human", "Voici l'image d'un passeport. Extrais les informations dans ce format :\n\n{format_instructions}"),
|
||||||
out_dict = {
|
])
|
||||||
"country": mrz_obj.country,
|
|
||||||
"names": mrz_obj.names,
|
# LLM avec vision (GPT-4o)
|
||||||
"number": number,
|
llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
||||||
"surname": mrz_obj.surname,
|
|
||||||
"mrz": mrz_obj.aux["text"],
|
chain = prompt | llm.with_structured_output(parser=parser)
|
||||||
}
|
|
||||||
out_dict["raw"] = tesseract_text
|
# Appel du LLM avec l'image en contexte
|
||||||
out = json.dumps(out_dict)
|
result = chain.invoke({
|
||||||
return out
|
"format_instructions": parser.get_format_instructions(),
|
||||||
|
"image": image,
|
||||||
|
})
|
||||||
|
|
||||||
|
return result.json()
|
Reference in New Issue
Block a user