Passport parser in progress

This commit is contained in:
Nitwix
2025-04-12 20:12:14 +02:00
9 changed files with 153 additions and 21 deletions

19
app.py
View File

@ -1,9 +1,10 @@
import logging
from flask import Flask
from flask import Flask, request
from flask_cors import cross_origin
import config
from dto.requests import GameStartRequestDTO
from dto.requests import GameStartRequestDTO, GameDecisionRequestDTO
from services.julius_baer_api_client import JuliusBaerApiClient
app = Flask(__name__)
@ -12,6 +13,7 @@ jb_client = JuliusBaerApiClient()
@app.route('/new-game', methods=['POST'])
@cross_origin() # allow all origins all methods
def new_game():
game_start_request = GameStartRequestDTO(player_name=config.API_TEAM)
res = jb_client.start_game(game_start_request)
@ -19,5 +21,18 @@ def new_game():
return res.model_dump_json()
@app.route('/next', methods=['POST'])
def next_client():
body = request.get_json()
decision = body.get("decision")
client_id = body.get("client_id")
session_id = body.get("session_id")
make_decision_request = GameDecisionRequestDTO(decision=decision, client_id=client_id, session_id=session_id)
res = jb_client.send_decision(make_decision_request)
return res.model_dump_json()
if __name__ == '__main__':
app.run()

View File

@ -6,7 +6,26 @@
<title>PDF Viewer</title>
<script type="module" src="./js/main.js"></script>
</head>
<body class="p-4">
<body x-data="gameManager" class="p-4">
<template x-if="isLoading">
<div class="alert alert-info" role="alert">
Loading game data...
</div>
</template>
<template x-if="error">
<div class="alert alert-danger" role="alert" x-text="error"></div>
</template>
<template x-if="gameData && !isLoading && !error">
<div>
<p>Game Ready!</p>
<p>Session ID: <code x-text="gameData.session_id"></code></p>
<p>First Client ID: <code x-text="gameData.client_id"></code></p>
</div>
</template>
<div class="container py-4 px-3 mx-auto">
<h1>Hello, Bootstrap and Vite!</h1>
<button class="btn btn-primary">Primary button</button>

View File

@ -7,6 +7,64 @@ import Alpine from 'alpinejs'
window.Alpine = Alpine
// Define an Alpine component to manage the game state
Alpine.data('gameManager', () => ({
// --- Component State ---
isLoading: false,
error: null,
gameData: null,
init() {
console.log('Game manager initializing...');
this.startNewGame();
},
startNewGame() {
this.isLoading = true;
this.error = null;
this.gameData = null;
// Use the browser's fetch API
fetch('http://127.0.0.1:5000/new-game', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
})
.then(response => {
// Check if the request was successful (status code 2xx)
if (!response.ok) {
// If not okay, throw an error to be caught by .catch()
// Try to get error details from response body if possible
return response.json().then(errData => {
throw new Error(errData.detail || `HTTP error! status: ${response.status}`);
}).catch(() => {
// Fallback if response is not JSON or other error
throw new Error(`HTTP error! status: ${response.status}`);
});
}
// If okay, parse the JSON response body
return response.json();
})
.then(data => {
// Success! Store the received game data
console.log('New game data received:', data);
this.gameData = data; // e.g., { message, session_id, client_id, ... }
})
.catch(error => {
// Handle any errors during fetch or processing
console.error('Error starting new game:', error);
this.error = error.message || 'Failed to start game. Check console/backend.';
})
.finally(() => {
// This runs regardless of success or failure
this.isLoading = false; // Turn off loading indicator
});
}
// submitDecision(decision) { ... }
}));
Alpine.data('pdfViewer', () => ({
pdfUrl: null,

View File

@ -40,4 +40,6 @@ Flask==3.1.0
annotated-types==0.7.0
blinker==1.9.0
langchain-google-genai==2.1.2
numpy==2.2.4
numpy==2.2.4
pymupdf == 1.25.5
flask-cors==5.0.1

View File

@ -26,3 +26,10 @@ ad6b6980-7b2e-4af9-bda0-2ec004574211,Accept
ac62e33d-6645-4360-8e14-21bfb0b6902a,Accept
d5789f9c-a0f4-4663-9c6e-0d416bbbffb8,Accept
25a429d6-bd8a-45d2-af1d-a0b8e5ec2e72,Accept
e592968f-4970-4aad-82d5-9f9383e3ae57,Accept
ef70356d-5014-498e-9897-643b6bc88dab,Accept
87db27e8-e1ff-4915-8dd2-2dc4afb570a4,Accept
161cffea-a125-4496-a0fc-17d21e2ae512,Accept
aaa05711-5e4b-455e-9971-fdba386441e9,Accept
3709c954-6c2e-42a5-90f7-8b73c374943f,Accept
f3054c1b-ad4a-4cad-be7b-03cb192f5c45,Accept

1 client_id decision
26 ac62e33d-6645-4360-8e14-21bfb0b6902a Accept
27 d5789f9c-a0f4-4663-9c6e-0d416bbbffb8 Accept
28 25a429d6-bd8a-45d2-af1d-a0b8e5ec2e72 Accept
29 e592968f-4970-4aad-82d5-9f9383e3ae57 Accept
30 ef70356d-5014-498e-9897-643b6bc88dab Accept
31 87db27e8-e1ff-4915-8dd2-2dc4afb570a4 Accept
32 161cffea-a125-4496-a0fc-17d21e2ae512 Accept
33 aaa05711-5e4b-455e-9971-fdba386441e9 Accept
34 3709c954-6c2e-42a5-90f7-8b73c374943f Accept
35 f3054c1b-ad4a-4cad-be7b-03cb192f5c45 Accept

View File

@ -113,8 +113,8 @@ def dummy_data() -> ExtractedData:
def dummy_client_data() -> dict[str, Any]:
# TODO make generic
resp_path = f"{config.GAME_FILES_DIR}/65e6ec83-88b1-4d1f-8560-a1418803348b/000000_decision_accept_active/000000_response.json"
resp_path = f"{config.GAME_FILES_DIR}/fc3b1f5a-296d-4cd0-a560-cfa5a6f8d302/000000_decision_accept_active/000000_response.json"
out = {}
with open(resp_path, "r") as file:
out = json.loads(str(json.loads(file.read())))
out = json.loads(file.read())["client_data"]
return out

7
tests/test_parser.py Normal file
View File

@ -0,0 +1,7 @@
from tests.dummy import dummy_client_data
from utils.parsers.passport_parser import process_passport
def test_passport_parser() -> None:
client_data = dummy_client_data()
process_passport(client_data.get("passport"))

View File

@ -1,22 +1,38 @@
import base64
from pdf2image import convert_from_bytes
import pytesseract
import pymupdf
def process_account(account_b64: str) -> str:
"""
Traite l'account :
- Décodage du PDF encodé en base64.
- Conversion de chaque page du PDF en image.
- Application de l'OCR sur chaque image pour extraire le texte.
- Extraction du texte et des champs de formulaire directement depuis le PDF.
:param account_b64: Chaîne base64 représentant le PDF.
:return: Texte extrait de chaque page du PDF.
:return: Texte extrait de chaque page du PDF, incluant les champs du formulaire.
"""
# Décodage du PDF en base64
pdf_bytes = base64.b64decode(account_b64)
images = convert_from_bytes(pdf_bytes)
pages_text = []
for i, image in enumerate(images):
text = pytesseract.image_to_string(image, lang="eng")
pages_text.append(f"--- Page {i + 1} ---\n{text}")
return "\n".join(pages_text)
# Ouverture du PDF avec PyMuPDF
pdf_document = pymupdf.open(stream=pdf_bytes, filetype="pdf")
# Traitement de chaque page
for i in range(len(pdf_document)):
page = pdf_document[i]
# Extraction des champs de formulaire
fields = page.widgets()
form_fields_text = []
for field in fields:
field_name = field.field_name
field_value = field.field_value
form_fields_text.append(f"Field: {field_name}, Value: {field_value}")
combined_text = ""
if form_fields_text:
combined_text += "\n\nForm Fields:\n" + "\n".join(form_fields_text)
pdf_document.close()
return "\n".join(combined_text)

View File

@ -1,8 +1,10 @@
import base64
import io
from tempfile import NamedTemporaryFile
from PIL import Image
import pytesseract
from passporteye import read_mrz
import json
def process_passport(passport_b64: str) -> str:
"""
@ -14,6 +16,12 @@ def process_passport(passport_b64: str) -> str:
:return: Texte extrait de l'image.
"""
image_bytes = base64.b64decode(passport_b64)
image = Image.open(io.BytesIO(image_bytes))
text = pytesseract.image_to_string(image, lang='eng')
# image = Image.open(io.BytesIO(image_bytes))
# text = pytesseract.image_to_string(image, lang='eng')
with NamedTemporaryFile(mode="wb") as tmp_img:
tmp_img.write(image_bytes)
with open(tmp_img.name, "rb") as read_img:
text = read_mrz(read_img)
# text = json.dumps(text)
# TODO CONTINUE
return text