diff --git a/.gitignore b/.gitignore index 1fd7de5..ef0cc4f 100644 --- a/.gitignore +++ b/.gitignore @@ -273,3 +273,5 @@ devenv.lock # pre-commit .pre-commit-config.yaml + +.vscode \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 52d6bbe..f96da76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ urllib3==2.4.0 pydantic==2.11.3 langchain==0.3.23 langchain-groq==0.3.2 +email-validator==2.2.0 \ No newline at end of file diff --git a/tests/test_cross_validate.py b/tests/test_cross_validate.py index 9457c6e..8ecd26b 100644 --- a/tests/test_cross_validate.py +++ b/tests/test_cross_validate.py @@ -1,4 +1,9 @@ -from validation import FromAccount +from datetime import date +from validation.cross_validate import ExtractedData +from validation.from_account import FromAccount +from validation.from_description import FromDescription +from validation.from_passport import FromPassport +from validation.from_profile import FromProfile account_data = FromAccount( @@ -15,5 +20,88 @@ account_data = FromAccount( country="Netherlands", name="Astrid Janneke Willems", phone_number="+31 06 34579996", - email="astrid.willems@upcmail.nl" -) \ No newline at end of file + email="astrid.willems@upcmail.nl", +) + +description_data = FromDescription( + full_name="Astrid Janneke Willems", + age=28, + nationality="Netherlands", + marital_status="single", + has_children=False, + secondary_education_school="Pieter Nieuwland College Utrecht", + secondary_education_year=2016, + university_name="Webster University Leiden", + university_graduation_year=2020, + occupation_title="Art Dealer", + employer="Rijksmuseum Amsterdam", + start_year=2021, + annual_salary_eur=40000, + total_savings_eur=20000, + has_properties=False, + inheritance_amount_eur=1590000, + inheritance_year=2020, + inheritance_source="grandmother (Oil and Gas Executive)", +) + +passport_data = FromPassport( + country="NLD", + passport_number="HW8642009", + surname="WILLEMS", + given_names="ASTRID JANNEKE", + birth_date=date(1997, 1, 19), + citizenship="Austrian/ÖSTERREICH", + sex="F", + issue_date=date(2016, 6, 4), + expiry_date=date(2026, 6, 3), + signature_present=True, + machine_readable_zone="P None: + data = ExtractedData( + account=account_data, + description=description_data, + passport=passport_data, + profile=profile_data, + ) diff --git a/validation/cross_validate.py b/validation/cross_validate.py index 928c460..78fe225 100644 --- a/validation/cross_validate.py +++ b/validation/cross_validate.py @@ -2,7 +2,11 @@ from enum import StrEnum from typing import Any, Callable, Optional from pydantic import BaseModel -from validation import FromAccount, FromDescription, FromPassport, FromProfile +from validation.from_account import FromAccount +from validation.from_description import FromDescription +from validation.from_passport import FromPassport +from validation.from_profile import FromProfile + class ExtractedData(BaseModel): diff --git a/validation/FromAccount.py b/validation/from_account.py similarity index 59% rename from validation/FromAccount.py rename to validation/from_account.py index 91307c8..5e91d4d 100644 --- a/validation/FromAccount.py +++ b/validation/from_account.py @@ -9,9 +9,9 @@ class FromAccount(BaseModel): model_config = ConfigDict(validate_assignment=True, str_strip_whitespace=True) # From account.pdf - account_name: str = Field(..., min_length=1) - account_holder_name: str = Field(..., min_length=1) - account_holder_surname: str = Field(..., min_length=1) + account_name: str = Field(min_length=1) + account_holder_name: str = Field(min_length=1) + account_holder_surname: str = Field(min_length=1) @model_validator(mode='after') def check_account_name_is_name_surname(self) -> Self: @@ -20,17 +20,17 @@ class FromAccount(BaseModel): raise ValueError(f'Account name is not name + surname: {self.account_name} != {combined}') return self - passport_number: str = Field(..., min_length=5) + passport_number: str = Field(min_length=5) reference_currency: Literal["CHF", "EUR", "USD", "Other"] other_currency: Optional[str] = None - building_number: str = Field(..., min_length=1) - street_name: str = Field(..., min_length=1) - postal_code: str = Field(..., min_length=1) - city: str = Field(..., min_length=1) - country: str = Field(..., min_length=1) + building_number: str = Field(min_length=1) + street_name: str = Field(min_length=1) + postal_code: str = Field(min_length=1) + city: str = Field(min_length=1) + country: str = Field(min_length=1) - name: str = Field(..., min_length=1) - phone_number: str = Field(..., min_length=6) + name: str = Field(min_length=1) + phone_number: str = Field(min_length=6) email: EmailStr \ No newline at end of file diff --git a/validation/FromDescription.py b/validation/from_description.py similarity index 100% rename from validation/FromDescription.py rename to validation/from_description.py diff --git a/validation/FromPassport.py b/validation/from_passport.py similarity index 96% rename from validation/FromPassport.py rename to validation/from_passport.py index 773d135..3cf3554 100644 --- a/validation/FromPassport.py +++ b/validation/from_passport.py @@ -10,7 +10,7 @@ class FromPassport(BaseModel): model_config = ConfigDict(validate_assignment=True, str_strip_whitespace=True) country: str = Field(..., min_length=3, max_length=3) # ISO 3166-1 alpha-3 - passport_number: str = Field(..., min_length=9, max_length=9, regex=r"^[A-Z0-9]{9}$") + passport_number: str = Field(..., min_length=9, max_length=9, pattern=r"^[A-Z0-9]{9}$") surname: str = Field(..., min_length=1) given_names: str = Field(..., min_length=1) diff --git a/validation/FromProfile.py b/validation/from_profile.py similarity index 98% rename from validation/FromProfile.py rename to validation/from_profile.py index 64abbee..40ae4e5 100644 --- a/validation/FromProfile.py +++ b/validation/from_profile.py @@ -17,7 +17,7 @@ class FromProfile(BaseModel): gender: Literal["Female", "Male"] # ID information - passport_number: str = Field(..., min_length=9, max_length=9, regex=r"^[A-Z0-9]{9}$") + passport_number: str = Field(..., min_length=9, max_length=9, pattern=r"^[A-Z0-9]{9}$") id_type: Literal["passport"] id_issue_date: date id_expiry_date: date