Passport parser in progress
This commit is contained in:
19
app.py
19
app.py
@ -1,9 +1,10 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from flask import Flask
|
from flask import Flask, request
|
||||||
|
from flask_cors import cross_origin
|
||||||
|
|
||||||
import config
|
import config
|
||||||
from dto.requests import GameStartRequestDTO
|
from dto.requests import GameStartRequestDTO, GameDecisionRequestDTO
|
||||||
from services.julius_baer_api_client import JuliusBaerApiClient
|
from services.julius_baer_api_client import JuliusBaerApiClient
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
@ -12,6 +13,7 @@ jb_client = JuliusBaerApiClient()
|
|||||||
|
|
||||||
|
|
||||||
@app.route('/new-game', methods=['POST'])
|
@app.route('/new-game', methods=['POST'])
|
||||||
|
@cross_origin() # allow all origins all methods
|
||||||
def new_game():
|
def new_game():
|
||||||
game_start_request = GameStartRequestDTO(player_name=config.API_TEAM)
|
game_start_request = GameStartRequestDTO(player_name=config.API_TEAM)
|
||||||
res = jb_client.start_game(game_start_request)
|
res = jb_client.start_game(game_start_request)
|
||||||
@ -19,5 +21,18 @@ def new_game():
|
|||||||
return res.model_dump_json()
|
return res.model_dump_json()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/next', methods=['POST'])
|
||||||
|
def next_client():
|
||||||
|
body = request.get_json()
|
||||||
|
|
||||||
|
decision = body.get("decision")
|
||||||
|
client_id = body.get("client_id")
|
||||||
|
session_id = body.get("session_id")
|
||||||
|
|
||||||
|
make_decision_request = GameDecisionRequestDTO(decision=decision, client_id=client_id, session_id=session_id)
|
||||||
|
res = jb_client.send_decision(make_decision_request)
|
||||||
|
|
||||||
|
return res.model_dump_json()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run()
|
app.run()
|
||||||
|
@ -6,7 +6,26 @@
|
|||||||
<title>PDF Viewer</title>
|
<title>PDF Viewer</title>
|
||||||
<script type="module" src="./js/main.js"></script>
|
<script type="module" src="./js/main.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body class="p-4">
|
<body x-data="gameManager" class="p-4">
|
||||||
|
|
||||||
|
<template x-if="isLoading">
|
||||||
|
<div class="alert alert-info" role="alert">
|
||||||
|
Loading game data...
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<template x-if="error">
|
||||||
|
<div class="alert alert-danger" role="alert" x-text="error"></div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<template x-if="gameData && !isLoading && !error">
|
||||||
|
<div>
|
||||||
|
<p>Game Ready!</p>
|
||||||
|
<p>Session ID: <code x-text="gameData.session_id"></code></p>
|
||||||
|
<p>First Client ID: <code x-text="gameData.client_id"></code></p>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
<div class="container py-4 px-3 mx-auto">
|
<div class="container py-4 px-3 mx-auto">
|
||||||
<h1>Hello, Bootstrap and Vite!</h1>
|
<h1>Hello, Bootstrap and Vite!</h1>
|
||||||
<button class="btn btn-primary">Primary button</button>
|
<button class="btn btn-primary">Primary button</button>
|
||||||
|
@ -7,6 +7,64 @@ import Alpine from 'alpinejs'
|
|||||||
|
|
||||||
window.Alpine = Alpine
|
window.Alpine = Alpine
|
||||||
|
|
||||||
|
// Define an Alpine component to manage the game state
|
||||||
|
Alpine.data('gameManager', () => ({
|
||||||
|
// --- Component State ---
|
||||||
|
isLoading: false,
|
||||||
|
error: null,
|
||||||
|
gameData: null,
|
||||||
|
|
||||||
|
init() {
|
||||||
|
console.log('Game manager initializing...');
|
||||||
|
this.startNewGame();
|
||||||
|
},
|
||||||
|
|
||||||
|
startNewGame() {
|
||||||
|
this.isLoading = true;
|
||||||
|
this.error = null;
|
||||||
|
this.gameData = null;
|
||||||
|
|
||||||
|
// Use the browser's fetch API
|
||||||
|
fetch('http://127.0.0.1:5000/new-game', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.then(response => {
|
||||||
|
// Check if the request was successful (status code 2xx)
|
||||||
|
if (!response.ok) {
|
||||||
|
// If not okay, throw an error to be caught by .catch()
|
||||||
|
// Try to get error details from response body if possible
|
||||||
|
return response.json().then(errData => {
|
||||||
|
throw new Error(errData.detail || `HTTP error! status: ${response.status}`);
|
||||||
|
}).catch(() => {
|
||||||
|
// Fallback if response is not JSON or other error
|
||||||
|
throw new Error(`HTTP error! status: ${response.status}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// If okay, parse the JSON response body
|
||||||
|
return response.json();
|
||||||
|
})
|
||||||
|
.then(data => {
|
||||||
|
// Success! Store the received game data
|
||||||
|
console.log('New game data received:', data);
|
||||||
|
this.gameData = data; // e.g., { message, session_id, client_id, ... }
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
// Handle any errors during fetch or processing
|
||||||
|
console.error('Error starting new game:', error);
|
||||||
|
this.error = error.message || 'Failed to start game. Check console/backend.';
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
// This runs regardless of success or failure
|
||||||
|
this.isLoading = false; // Turn off loading indicator
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// submitDecision(decision) { ... }
|
||||||
|
}));
|
||||||
|
|
||||||
Alpine.data('pdfViewer', () => ({
|
Alpine.data('pdfViewer', () => ({
|
||||||
pdfUrl: null,
|
pdfUrl: null,
|
||||||
|
|
||||||
|
@ -40,4 +40,6 @@ Flask==3.1.0
|
|||||||
annotated-types==0.7.0
|
annotated-types==0.7.0
|
||||||
blinker==1.9.0
|
blinker==1.9.0
|
||||||
langchain-google-genai==2.1.2
|
langchain-google-genai==2.1.2
|
||||||
numpy==2.2.4
|
numpy==2.2.4
|
||||||
|
pymupdf == 1.25.5
|
||||||
|
flask-cors==5.0.1
|
||||||
|
@ -26,3 +26,10 @@ ad6b6980-7b2e-4af9-bda0-2ec004574211,Accept
|
|||||||
ac62e33d-6645-4360-8e14-21bfb0b6902a,Accept
|
ac62e33d-6645-4360-8e14-21bfb0b6902a,Accept
|
||||||
d5789f9c-a0f4-4663-9c6e-0d416bbbffb8,Accept
|
d5789f9c-a0f4-4663-9c6e-0d416bbbffb8,Accept
|
||||||
25a429d6-bd8a-45d2-af1d-a0b8e5ec2e72,Accept
|
25a429d6-bd8a-45d2-af1d-a0b8e5ec2e72,Accept
|
||||||
|
e592968f-4970-4aad-82d5-9f9383e3ae57,Accept
|
||||||
|
ef70356d-5014-498e-9897-643b6bc88dab,Accept
|
||||||
|
87db27e8-e1ff-4915-8dd2-2dc4afb570a4,Accept
|
||||||
|
161cffea-a125-4496-a0fc-17d21e2ae512,Accept
|
||||||
|
aaa05711-5e4b-455e-9971-fdba386441e9,Accept
|
||||||
|
3709c954-6c2e-42a5-90f7-8b73c374943f,Accept
|
||||||
|
f3054c1b-ad4a-4cad-be7b-03cb192f5c45,Accept
|
||||||
|
|
@ -113,8 +113,8 @@ def dummy_data() -> ExtractedData:
|
|||||||
|
|
||||||
def dummy_client_data() -> dict[str, Any]:
|
def dummy_client_data() -> dict[str, Any]:
|
||||||
# TODO make generic
|
# TODO make generic
|
||||||
resp_path = f"{config.GAME_FILES_DIR}/65e6ec83-88b1-4d1f-8560-a1418803348b/000000_decision_accept_active/000000_response.json"
|
resp_path = f"{config.GAME_FILES_DIR}/fc3b1f5a-296d-4cd0-a560-cfa5a6f8d302/000000_decision_accept_active/000000_response.json"
|
||||||
out = {}
|
out = {}
|
||||||
with open(resp_path, "r") as file:
|
with open(resp_path, "r") as file:
|
||||||
out = json.loads(str(json.loads(file.read())))
|
out = json.loads(file.read())["client_data"]
|
||||||
return out
|
return out
|
7
tests/test_parser.py
Normal file
7
tests/test_parser.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from tests.dummy import dummy_client_data
|
||||||
|
from utils.parsers.passport_parser import process_passport
|
||||||
|
|
||||||
|
|
||||||
|
def test_passport_parser() -> None:
|
||||||
|
client_data = dummy_client_data()
|
||||||
|
process_passport(client_data.get("passport"))
|
@ -1,22 +1,38 @@
|
|||||||
import base64
|
import base64
|
||||||
from pdf2image import convert_from_bytes
|
import pymupdf
|
||||||
import pytesseract
|
|
||||||
|
|
||||||
|
|
||||||
def process_account(account_b64: str) -> str:
|
def process_account(account_b64: str) -> str:
|
||||||
"""
|
"""
|
||||||
Traite l'account :
|
Traite l'account :
|
||||||
- Décodage du PDF encodé en base64.
|
- Décodage du PDF encodé en base64.
|
||||||
- Conversion de chaque page du PDF en image.
|
- Extraction du texte et des champs de formulaire directement depuis le PDF.
|
||||||
- Application de l'OCR sur chaque image pour extraire le texte.
|
|
||||||
|
|
||||||
:param account_b64: Chaîne base64 représentant le PDF.
|
:param account_b64: Chaîne base64 représentant le PDF.
|
||||||
:return: Texte extrait de chaque page du PDF.
|
:return: Texte extrait de chaque page du PDF, incluant les champs du formulaire.
|
||||||
"""
|
"""
|
||||||
|
# Décodage du PDF en base64
|
||||||
pdf_bytes = base64.b64decode(account_b64)
|
pdf_bytes = base64.b64decode(account_b64)
|
||||||
images = convert_from_bytes(pdf_bytes)
|
|
||||||
pages_text = []
|
# Ouverture du PDF avec PyMuPDF
|
||||||
for i, image in enumerate(images):
|
pdf_document = pymupdf.open(stream=pdf_bytes, filetype="pdf")
|
||||||
text = pytesseract.image_to_string(image, lang="eng")
|
|
||||||
pages_text.append(f"--- Page {i + 1} ---\n{text}")
|
# Traitement de chaque page
|
||||||
return "\n".join(pages_text)
|
for i in range(len(pdf_document)):
|
||||||
|
page = pdf_document[i]
|
||||||
|
|
||||||
|
# Extraction des champs de formulaire
|
||||||
|
fields = page.widgets()
|
||||||
|
form_fields_text = []
|
||||||
|
|
||||||
|
for field in fields:
|
||||||
|
field_name = field.field_name
|
||||||
|
field_value = field.field_value
|
||||||
|
form_fields_text.append(f"Field: {field_name}, Value: {field_value}")
|
||||||
|
|
||||||
|
combined_text = ""
|
||||||
|
|
||||||
|
if form_fields_text:
|
||||||
|
combined_text += "\n\nForm Fields:\n" + "\n".join(form_fields_text)
|
||||||
|
|
||||||
|
pdf_document.close()
|
||||||
|
return "\n".join(combined_text)
|
@ -1,8 +1,10 @@
|
|||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import pytesseract
|
import pytesseract
|
||||||
|
from passporteye import read_mrz
|
||||||
|
import json
|
||||||
|
|
||||||
def process_passport(passport_b64: str) -> str:
|
def process_passport(passport_b64: str) -> str:
|
||||||
"""
|
"""
|
||||||
@ -14,6 +16,12 @@ def process_passport(passport_b64: str) -> str:
|
|||||||
:return: Texte extrait de l'image.
|
:return: Texte extrait de l'image.
|
||||||
"""
|
"""
|
||||||
image_bytes = base64.b64decode(passport_b64)
|
image_bytes = base64.b64decode(passport_b64)
|
||||||
image = Image.open(io.BytesIO(image_bytes))
|
# image = Image.open(io.BytesIO(image_bytes))
|
||||||
text = pytesseract.image_to_string(image, lang='eng')
|
# text = pytesseract.image_to_string(image, lang='eng')
|
||||||
|
with NamedTemporaryFile(mode="wb") as tmp_img:
|
||||||
|
tmp_img.write(image_bytes)
|
||||||
|
with open(tmp_img.name, "rb") as read_img:
|
||||||
|
text = read_mrz(read_img)
|
||||||
|
# text = json.dumps(text)
|
||||||
|
# TODO CONTINUE
|
||||||
return text
|
return text
|
Reference in New Issue
Block a user