From fc98b1bb705329d34233f5ad10e8f9c3dbd37d53 Mon Sep 17 00:00:00 2001 From: Nitwix Date: Sat, 12 Apr 2025 14:20:40 +0200 Subject: [PATCH] Add some validation rules --- tests/dummy.py | 109 ++++++++++++++++++++++++++++++++ tests/test_cross_validate.py | 116 ++++------------------------------- tests/test_validation.py | 12 ++++ validation/cross_validate.py | 14 ++++- validation/from_passport.py | 10 ++- validation/llm_validate.py | 2 + 6 files changed, 154 insertions(+), 109 deletions(-) create mode 100644 tests/dummy.py create mode 100644 tests/test_validation.py create mode 100644 validation/llm_validate.py diff --git a/tests/dummy.py b/tests/dummy.py new file mode 100644 index 0000000..c8836fd --- /dev/null +++ b/tests/dummy.py @@ -0,0 +1,109 @@ +from datetime import date +from validation.cross_validate import ExtractedData +from validation.from_account import FromAccount +from validation.from_description import FromDescription +from validation.from_passport import FromPassport +from validation.from_profile import FromProfile + + +def dummy_account() -> FromAccount: + return FromAccount( + account_name="Astrid Janneke Willems", + account_holder_name="Astrid Janneke", + account_holder_surname="Willems", + passport_number="HW8642009", + reference_currency="EUR", + other_currency=None, + building_number="18", + street_name="Lijnbaan", + postal_code="7523 05", + city="Assen", + country="Netherlands", + name="Astrid Janneke Willems", + phone_number="+31 06 34579996", + email="astrid.willems@upcmail.nl", + ) + +def dummy_description() -> FromDescription: + return FromDescription( + full_name="Astrid Janneke Willems", + age=28, + nationality="Netherlands", + marital_status="single", + has_children=False, + secondary_education_school="Pieter Nieuwland College Utrecht", + secondary_education_year=2016, + university_name="Webster University Leiden", + university_graduation_year=2020, + occupation_title="Art Dealer", + employer="Rijksmuseum Amsterdam", + start_year=2021, + annual_salary_eur=40000, + total_savings_eur=20000, + has_properties=False, + inheritance_amount_eur=1590000, + inheritance_year=2020, + inheritance_source="grandmother (Oil and Gas Executive)", + ) +def dummy_passport() -> FromPassport: + return FromPassport( + country="NLD", + passport_number="HW8642009", + surname="WILLEMS", + given_names="ASTRID JANNEKE", + birth_date=date(1997, 1, 19), + citizenship="Austrian/ÖSTERREICH", + sex="F", + issue_date=date(2016, 6, 4), + expiry_date=date(2026, 6, 3), + signature_present=True, + machine_readable_zone="P FromProfile: + return FromProfile( + first_name="Astrid Janneke", + last_name="Willems", + date_of_birth=date(1997, 1, 19), + nationality="Dutch", + country_of_domicile="Netherlands", + gender="Female", + passport_number="HW8642009", + id_type="passport", + id_issue_date=date(2016, 6, 4), + id_expiry_date=date(2026, 6, 3), + phone="+31 06 34579996", + email="astrid.willems@upcmail.nl", + address="Lijnbaan 18, 7523 05 Assen", + politically_exposed_person=False, + marital_status="Single", + highest_education="Tertiary", + education_history="Webster University Leiden (2020)", + employment_status="Employee", + employment_since=2021, + employer="Rijksmuseum Amsterdam", + position="Art Dealer", + annual_salary_eur=40000.0, + total_wealth_range="1.5m-5m", + origin_of_wealth=["Employment", "Inheritance"], + inheritance_details="Grandmother, 2020, Oil and Gas Executive", + business_assets_eur=20000.0, + estimated_annual_income="<250k", + income_country="Netherlands", + commercial_account=False, + investment_risk_profile="High", + mandate_type="Advisory", + investment_experience="Experienced", + investment_horizon="Medium", + preferred_markets=["Denmark", "Netherlands"], + total_aum=1610000.0, + aum_to_transfer=1320200.0, + ) + +def dummy_data() -> ExtractedData: + return ExtractedData( + account=dummy_account(), + description=dummy_description(), + passport=dummy_passport(), + profile=dummy_profile(), + ) \ No newline at end of file diff --git a/tests/test_cross_validate.py b/tests/test_cross_validate.py index 7062b12..c006a41 100644 --- a/tests/test_cross_validate.py +++ b/tests/test_cross_validate.py @@ -1,110 +1,16 @@ -from datetime import date -from validation.cross_validate import ExtractedData, xval_name_account_description -from validation.from_account import FromAccount -from validation.from_description import FromDescription -from validation.from_passport import FromPassport -from validation.from_profile import FromProfile +from tests.dummy import dummy_data +from validation.cross_validate import xval_all, xval_email_account_profile, xval_name_account_description -account_data = FromAccount( - account_name="Astrid Janneke Willems", - account_holder_name="Astrid Janneke", - account_holder_surname="Willems", - passport_number="HW8642009", - reference_currency="EUR", - other_currency=None, - building_number="18", - street_name="Lijnbaan", - postal_code="7523 05", - city="Assen", - country="Netherlands", - name="Astrid Janneke Willems", - phone_number="+31 06 34579996", - email="astrid.willems@upcmail.nl", -) - -description_data = FromDescription( - full_name="Astrid Janneke Willems", - age=28, - nationality="Netherlands", - marital_status="single", - has_children=False, - secondary_education_school="Pieter Nieuwland College Utrecht", - secondary_education_year=2016, - university_name="Webster University Leiden", - university_graduation_year=2020, - occupation_title="Art Dealer", - employer="Rijksmuseum Amsterdam", - start_year=2021, - annual_salary_eur=40000, - total_savings_eur=20000, - has_properties=False, - inheritance_amount_eur=1590000, - inheritance_year=2020, - inheritance_source="grandmother (Oil and Gas Executive)", -) - -passport_data = FromPassport( - country="NLD", - passport_number="HW8642009", - surname="WILLEMS", - given_names="ASTRID JANNEKE", - birth_date=date(1997, 1, 19), - citizenship="Austrian/ÖSTERREICH", - sex="F", - issue_date=date(2016, 6, 4), - expiry_date=date(2026, 6, 3), - signature_present=True, - machine_readable_zone="P None: - data = ExtractedData( - account=account_data, - description=description_data, - passport=passport_data, - profile=profile_data, - ) - - failure = xval_name_account_description(data) + failure = xval_name_account_description(dummy_data()) assert failure is None + +def test_xval_email_account_profile() -> None: + failure = xval_email_account_profile(dummy_data()) + assert failure is None + +def test_xval_all() -> None: + failures = xval_all(dummy_data()) + assert len(failures) == 0 \ No newline at end of file diff --git a/tests/test_validation.py b/tests/test_validation.py new file mode 100644 index 0000000..48407fe --- /dev/null +++ b/tests/test_validation.py @@ -0,0 +1,12 @@ +from datetime import date + +from pydantic import ValidationError +import pytest +from tests.dummy import dummy_passport +from validation.from_passport import FromPassport + + +def test_check_expiry_date_after_issue_date() -> None: + dummy = dummy_passport() + with pytest.raises(ValidationError): + dummy.expiry_date = date(1900, 1, 1) \ No newline at end of file diff --git a/validation/cross_validate.py b/validation/cross_validate.py index 2e30003..c5a1b8f 100644 --- a/validation/cross_validate.py +++ b/validation/cross_validate.py @@ -50,10 +50,20 @@ def xval_email_account_profile(data: ExtractedData) -> Optional[XValFailure]: doc2_val=f"{data.profile.email=}" ) +def xval_passport_no_account_passport(data: ExtractedData) -> Optional[XValFailure]: + if data.account.passport_number != data.passport.passport_number: + return XValFailure( + doc1_type=DocType.account, + doc1_val=f"{data.account.passport_number=}", + doc2_type=DocType.passport, + doc2_val=f"{data.passport.passport_number=}" + ) -def xref_all(data: ExtractedData) -> list[XValFailure]: +def xval_all(data: ExtractedData) -> list[XValFailure]: xref_validators: list[Callable[[ExtractedData], Optional[XValFailure]]] = [ - xval_name_account_description + xval_name_account_description, + xval_email_account_profile, + xval_passport_no_account_passport ] validation_failures = [] diff --git a/validation/from_passport.py b/validation/from_passport.py index 3cf3554..89946cc 100644 --- a/validation/from_passport.py +++ b/validation/from_passport.py @@ -1,6 +1,6 @@ from datetime import date -from typing import Literal -from pydantic import BaseModel, ConfigDict, Field +from typing import Literal, Self +from pydantic import BaseModel, ConfigDict, Field, model_validator class FromPassport(BaseModel): @@ -22,6 +22,12 @@ class FromPassport(BaseModel): issue_date: date expiry_date: date + @model_validator(mode='after') + def check_expiry_date_after_issue_date(self) -> Self: + if self.issue_date >= self.expiry_date: + raise ValueError(f'Expiry date is not after issue date') + return self + signature_present: bool machine_readable_zone: str = Field(..., min_length=44) \ No newline at end of file diff --git a/validation/llm_validate.py b/validation/llm_validate.py new file mode 100644 index 0000000..d0d9d8e --- /dev/null +++ b/validation/llm_validate.py @@ -0,0 +1,2 @@ +# TODO +# account.reference_currency corresponds to passport.country \ No newline at end of file