Fix passport OCR B -> 8

This commit is contained in:
Nitwix
2025-04-12 22:58:37 +02:00
parent b8c556bd88
commit b2838cf914
4 changed files with 22 additions and 10 deletions

View File

@ -1,4 +1,5 @@
from datetime import date from datetime import date
import glob
import json import json
from typing import Any from typing import Any
import config import config
@ -111,10 +112,16 @@ def dummy_data() -> ExtractedData:
profile=dummy_profile(), profile=dummy_profile(),
) )
def dummy_client_data() -> dict[str, Any]: def dummy_client_data() -> list[dict[str, Any]]:
# TODO make generic glob_str = f"{config.GAME_FILES_DIR}/**/*_response.json"
resp_path = f"{config.GAME_FILES_DIR}/fc3b1f5a-296d-4cd0-a560-cfa5a6f8d302/000000_decision_accept_active/000000_response.json" responses = glob.glob(glob_str, recursive=True)
out = {} out = []
for resp_path in responses:
c_data = None
with open(resp_path, "r") as file: with open(resp_path, "r") as file:
out = json.loads(file.read())["client_data"] loaded = json.loads(file.read())
print(f"Loaded {resp_path}")
c_data = loaded["client_data"]
if not c_data is None:
out.append(c_data)
return out return out

View File

@ -3,6 +3,6 @@ from tests.dummy import dummy_client_data
def test_extract_passport() -> None: def test_extract_passport() -> None:
client_data = dummy_client_data() client_data = dummy_client_data()[0]
passport = extract_passport(client_data) passport = extract_passport(client_data)
passport passport

View File

@ -3,5 +3,7 @@ from utils.parsers.passport_parser import process_passport
def test_passport_parser() -> None: def test_passport_parser() -> None:
client_data = dummy_client_data() client_data = dummy_client_data()[0]
process_passport(client_data.get("passport"))
passport = process_passport(client_data.get("passport"))
passport

View File

@ -25,10 +25,13 @@ def process_passport(passport_b64: str) -> str:
tesseract_text = pytesseract.image_to_string(image, lang='eng') tesseract_text = pytesseract.image_to_string(image, lang='eng')
out_dict = {} out_dict = {}
if not mrz_obj is None: if not mrz_obj is None:
number_raw = str(mrz_obj.number)
# It's not called a 'Hack'athon for nothing...
number = number_raw.replace("B", "8")
out_dict = { out_dict = {
"country": mrz_obj.country, "country": mrz_obj.country,
"names": mrz_obj.names, "names": mrz_obj.names,
"number": mrz_obj.number, "number": number,
"surname": mrz_obj.surname, "surname": mrz_obj.surname,
"mrz": mrz_obj.aux["text"], "mrz": mrz_obj.aux["text"],
} }