From b2838cf914de13907cfe34543e1c699f7b9c92d8 Mon Sep 17 00:00:00 2001 From: Nitwix Date: Sat, 12 Apr 2025 22:58:37 +0200 Subject: [PATCH] Fix passport OCR B -> 8 --- tests/dummy.py | 19 +++++++++++++------ tests/test_extraction.py | 2 +- tests/test_parser.py | 6 ++++-- utils/parsers/passport_parser.py | 5 ++++- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tests/dummy.py b/tests/dummy.py index 87c43f8..48f32b3 100644 --- a/tests/dummy.py +++ b/tests/dummy.py @@ -1,4 +1,5 @@ from datetime import date +import glob import json from typing import Any import config @@ -111,10 +112,16 @@ def dummy_data() -> ExtractedData: profile=dummy_profile(), ) -def dummy_client_data() -> dict[str, Any]: - # TODO make generic - resp_path = f"{config.GAME_FILES_DIR}/fc3b1f5a-296d-4cd0-a560-cfa5a6f8d302/000000_decision_accept_active/000000_response.json" - out = {} - with open(resp_path, "r") as file: - out = json.loads(file.read())["client_data"] +def dummy_client_data() -> list[dict[str, Any]]: + glob_str = f"{config.GAME_FILES_DIR}/**/*_response.json" + responses = glob.glob(glob_str, recursive=True) + out = [] + for resp_path in responses: + c_data = None + with open(resp_path, "r") as file: + loaded = json.loads(file.read()) + print(f"Loaded {resp_path}") + c_data = loaded["client_data"] + if not c_data is None: + out.append(c_data) return out \ No newline at end of file diff --git a/tests/test_extraction.py b/tests/test_extraction.py index ef05ad0..1e1a4da 100644 --- a/tests/test_extraction.py +++ b/tests/test_extraction.py @@ -3,6 +3,6 @@ from tests.dummy import dummy_client_data def test_extract_passport() -> None: - client_data = dummy_client_data() + client_data = dummy_client_data()[0] passport = extract_passport(client_data) passport \ No newline at end of file diff --git a/tests/test_parser.py b/tests/test_parser.py index db98168..20acae5 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -3,5 +3,7 @@ from utils.parsers.passport_parser import process_passport def test_passport_parser() -> None: - client_data = dummy_client_data() - process_passport(client_data.get("passport")) \ No newline at end of file + client_data = dummy_client_data()[0] + + passport = process_passport(client_data.get("passport")) + passport \ No newline at end of file diff --git a/utils/parsers/passport_parser.py b/utils/parsers/passport_parser.py index 1b392c2..58d2b6e 100644 --- a/utils/parsers/passport_parser.py +++ b/utils/parsers/passport_parser.py @@ -25,10 +25,13 @@ def process_passport(passport_b64: str) -> str: tesseract_text = pytesseract.image_to_string(image, lang='eng') out_dict = {} if not mrz_obj is None: + number_raw = str(mrz_obj.number) + # It's not called a 'Hack'athon for nothing... + number = number_raw.replace("B", "8") out_dict = { "country": mrz_obj.country, "names": mrz_obj.names, - "number": mrz_obj.number, + "number": number, "surname": mrz_obj.surname, "mrz": mrz_obj.aux["text"], }