Passport parser in progress
This commit is contained in:
19
app.py
19
app.py
@ -1,9 +1,10 @@
|
||||
import logging
|
||||
|
||||
from flask import Flask
|
||||
from flask import Flask, request
|
||||
from flask_cors import cross_origin
|
||||
|
||||
import config
|
||||
from dto.requests import GameStartRequestDTO
|
||||
from dto.requests import GameStartRequestDTO, GameDecisionRequestDTO
|
||||
from services.julius_baer_api_client import JuliusBaerApiClient
|
||||
|
||||
app = Flask(__name__)
|
||||
@ -12,6 +13,7 @@ jb_client = JuliusBaerApiClient()
|
||||
|
||||
|
||||
@app.route('/new-game', methods=['POST'])
|
||||
@cross_origin() # allow all origins all methods
|
||||
def new_game():
|
||||
game_start_request = GameStartRequestDTO(player_name=config.API_TEAM)
|
||||
res = jb_client.start_game(game_start_request)
|
||||
@ -19,5 +21,18 @@ def new_game():
|
||||
return res.model_dump_json()
|
||||
|
||||
|
||||
@app.route('/next', methods=['POST'])
|
||||
def next_client():
|
||||
body = request.get_json()
|
||||
|
||||
decision = body.get("decision")
|
||||
client_id = body.get("client_id")
|
||||
session_id = body.get("session_id")
|
||||
|
||||
make_decision_request = GameDecisionRequestDTO(decision=decision, client_id=client_id, session_id=session_id)
|
||||
res = jb_client.send_decision(make_decision_request)
|
||||
|
||||
return res.model_dump_json()
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run()
|
||||
|
@ -6,7 +6,26 @@
|
||||
<title>PDF Viewer</title>
|
||||
<script type="module" src="./js/main.js"></script>
|
||||
</head>
|
||||
<body class="p-4">
|
||||
<body x-data="gameManager" class="p-4">
|
||||
|
||||
<template x-if="isLoading">
|
||||
<div class="alert alert-info" role="alert">
|
||||
Loading game data...
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<template x-if="error">
|
||||
<div class="alert alert-danger" role="alert" x-text="error"></div>
|
||||
</template>
|
||||
|
||||
<template x-if="gameData && !isLoading && !error">
|
||||
<div>
|
||||
<p>Game Ready!</p>
|
||||
<p>Session ID: <code x-text="gameData.session_id"></code></p>
|
||||
<p>First Client ID: <code x-text="gameData.client_id"></code></p>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<div class="container py-4 px-3 mx-auto">
|
||||
<h1>Hello, Bootstrap and Vite!</h1>
|
||||
<button class="btn btn-primary">Primary button</button>
|
||||
|
@ -7,6 +7,64 @@ import Alpine from 'alpinejs'
|
||||
|
||||
window.Alpine = Alpine
|
||||
|
||||
// Define an Alpine component to manage the game state
|
||||
Alpine.data('gameManager', () => ({
|
||||
// --- Component State ---
|
||||
isLoading: false,
|
||||
error: null,
|
||||
gameData: null,
|
||||
|
||||
init() {
|
||||
console.log('Game manager initializing...');
|
||||
this.startNewGame();
|
||||
},
|
||||
|
||||
startNewGame() {
|
||||
this.isLoading = true;
|
||||
this.error = null;
|
||||
this.gameData = null;
|
||||
|
||||
// Use the browser's fetch API
|
||||
fetch('http://127.0.0.1:5000/new-game', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
.then(response => {
|
||||
// Check if the request was successful (status code 2xx)
|
||||
if (!response.ok) {
|
||||
// If not okay, throw an error to be caught by .catch()
|
||||
// Try to get error details from response body if possible
|
||||
return response.json().then(errData => {
|
||||
throw new Error(errData.detail || `HTTP error! status: ${response.status}`);
|
||||
}).catch(() => {
|
||||
// Fallback if response is not JSON or other error
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
});
|
||||
}
|
||||
// If okay, parse the JSON response body
|
||||
return response.json();
|
||||
})
|
||||
.then(data => {
|
||||
// Success! Store the received game data
|
||||
console.log('New game data received:', data);
|
||||
this.gameData = data; // e.g., { message, session_id, client_id, ... }
|
||||
})
|
||||
.catch(error => {
|
||||
// Handle any errors during fetch or processing
|
||||
console.error('Error starting new game:', error);
|
||||
this.error = error.message || 'Failed to start game. Check console/backend.';
|
||||
})
|
||||
.finally(() => {
|
||||
// This runs regardless of success or failure
|
||||
this.isLoading = false; // Turn off loading indicator
|
||||
});
|
||||
}
|
||||
|
||||
// submitDecision(decision) { ... }
|
||||
}));
|
||||
|
||||
Alpine.data('pdfViewer', () => ({
|
||||
pdfUrl: null,
|
||||
|
||||
|
@ -40,4 +40,6 @@ Flask==3.1.0
|
||||
annotated-types==0.7.0
|
||||
blinker==1.9.0
|
||||
langchain-google-genai==2.1.2
|
||||
numpy==2.2.4
|
||||
numpy==2.2.4
|
||||
pymupdf == 1.25.5
|
||||
flask-cors==5.0.1
|
||||
|
@ -26,3 +26,10 @@ ad6b6980-7b2e-4af9-bda0-2ec004574211,Accept
|
||||
ac62e33d-6645-4360-8e14-21bfb0b6902a,Accept
|
||||
d5789f9c-a0f4-4663-9c6e-0d416bbbffb8,Accept
|
||||
25a429d6-bd8a-45d2-af1d-a0b8e5ec2e72,Accept
|
||||
e592968f-4970-4aad-82d5-9f9383e3ae57,Accept
|
||||
ef70356d-5014-498e-9897-643b6bc88dab,Accept
|
||||
87db27e8-e1ff-4915-8dd2-2dc4afb570a4,Accept
|
||||
161cffea-a125-4496-a0fc-17d21e2ae512,Accept
|
||||
aaa05711-5e4b-455e-9971-fdba386441e9,Accept
|
||||
3709c954-6c2e-42a5-90f7-8b73c374943f,Accept
|
||||
f3054c1b-ad4a-4cad-be7b-03cb192f5c45,Accept
|
||||
|
|
@ -113,8 +113,8 @@ def dummy_data() -> ExtractedData:
|
||||
|
||||
def dummy_client_data() -> dict[str, Any]:
|
||||
# TODO make generic
|
||||
resp_path = f"{config.GAME_FILES_DIR}/65e6ec83-88b1-4d1f-8560-a1418803348b/000000_decision_accept_active/000000_response.json"
|
||||
resp_path = f"{config.GAME_FILES_DIR}/fc3b1f5a-296d-4cd0-a560-cfa5a6f8d302/000000_decision_accept_active/000000_response.json"
|
||||
out = {}
|
||||
with open(resp_path, "r") as file:
|
||||
out = json.loads(str(json.loads(file.read())))
|
||||
out = json.loads(file.read())["client_data"]
|
||||
return out
|
7
tests/test_parser.py
Normal file
7
tests/test_parser.py
Normal file
@ -0,0 +1,7 @@
|
||||
from tests.dummy import dummy_client_data
|
||||
from utils.parsers.passport_parser import process_passport
|
||||
|
||||
|
||||
def test_passport_parser() -> None:
|
||||
client_data = dummy_client_data()
|
||||
process_passport(client_data.get("passport"))
|
@ -1,22 +1,38 @@
|
||||
import base64
|
||||
from pdf2image import convert_from_bytes
|
||||
import pytesseract
|
||||
|
||||
import pymupdf
|
||||
|
||||
def process_account(account_b64: str) -> str:
|
||||
"""
|
||||
Traite l'account :
|
||||
- Décodage du PDF encodé en base64.
|
||||
- Conversion de chaque page du PDF en image.
|
||||
- Application de l'OCR sur chaque image pour extraire le texte.
|
||||
- Extraction du texte et des champs de formulaire directement depuis le PDF.
|
||||
|
||||
:param account_b64: Chaîne base64 représentant le PDF.
|
||||
:return: Texte extrait de chaque page du PDF.
|
||||
:return: Texte extrait de chaque page du PDF, incluant les champs du formulaire.
|
||||
"""
|
||||
# Décodage du PDF en base64
|
||||
pdf_bytes = base64.b64decode(account_b64)
|
||||
images = convert_from_bytes(pdf_bytes)
|
||||
pages_text = []
|
||||
for i, image in enumerate(images):
|
||||
text = pytesseract.image_to_string(image, lang="eng")
|
||||
pages_text.append(f"--- Page {i + 1} ---\n{text}")
|
||||
return "\n".join(pages_text)
|
||||
|
||||
# Ouverture du PDF avec PyMuPDF
|
||||
pdf_document = pymupdf.open(stream=pdf_bytes, filetype="pdf")
|
||||
|
||||
# Traitement de chaque page
|
||||
for i in range(len(pdf_document)):
|
||||
page = pdf_document[i]
|
||||
|
||||
# Extraction des champs de formulaire
|
||||
fields = page.widgets()
|
||||
form_fields_text = []
|
||||
|
||||
for field in fields:
|
||||
field_name = field.field_name
|
||||
field_value = field.field_value
|
||||
form_fields_text.append(f"Field: {field_name}, Value: {field_value}")
|
||||
|
||||
combined_text = ""
|
||||
|
||||
if form_fields_text:
|
||||
combined_text += "\n\nForm Fields:\n" + "\n".join(form_fields_text)
|
||||
|
||||
pdf_document.close()
|
||||
return "\n".join(combined_text)
|
@ -1,8 +1,10 @@
|
||||
import base64
|
||||
import io
|
||||
from tempfile import NamedTemporaryFile
|
||||
from PIL import Image
|
||||
import pytesseract
|
||||
|
||||
from passporteye import read_mrz
|
||||
import json
|
||||
|
||||
def process_passport(passport_b64: str) -> str:
|
||||
"""
|
||||
@ -14,6 +16,12 @@ def process_passport(passport_b64: str) -> str:
|
||||
:return: Texte extrait de l'image.
|
||||
"""
|
||||
image_bytes = base64.b64decode(passport_b64)
|
||||
image = Image.open(io.BytesIO(image_bytes))
|
||||
text = pytesseract.image_to_string(image, lang='eng')
|
||||
# image = Image.open(io.BytesIO(image_bytes))
|
||||
# text = pytesseract.image_to_string(image, lang='eng')
|
||||
with NamedTemporaryFile(mode="wb") as tmp_img:
|
||||
tmp_img.write(image_bytes)
|
||||
with open(tmp_img.name, "rb") as read_img:
|
||||
text = read_mrz(read_img)
|
||||
# text = json.dumps(text)
|
||||
# TODO CONTINUE
|
||||
return text
|
Reference in New Issue
Block a user