From e96549604c2b4664ca228099d2f0123ee76e6ae0 Mon Sep 17 00:00:00 2001 From: Nitwix Date: Sat, 12 Apr 2025 11:55:18 +0200 Subject: [PATCH] Skeleton for final data validation and xref --- dto/client_data.py | 10 ----- dto/client_data/FromAccount.py | 36 +++++++++++++++++ dto/client_data/FromDescription.py | 35 ++++++++++++++++ dto/client_data/FromPassport.py | 27 +++++++++++++ dto/client_data/FromProfile.py | 65 ++++++++++++++++++++++++++++++ dto/client_data/__init__.py | 0 dto/client_data/cross_validate.py | 44 ++++++++++++++++++++ 7 files changed, 207 insertions(+), 10 deletions(-) delete mode 100644 dto/client_data.py create mode 100644 dto/client_data/FromAccount.py create mode 100644 dto/client_data/FromDescription.py create mode 100644 dto/client_data/FromPassport.py create mode 100644 dto/client_data/FromProfile.py create mode 100644 dto/client_data/__init__.py create mode 100644 dto/client_data/cross_validate.py diff --git a/dto/client_data.py b/dto/client_data.py deleted file mode 100644 index e47274f..0000000 --- a/dto/client_data.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel - -class ClientData(BaseModel): - """ - Model for the client data attributes which need to be validated and compared for correspondence between - the data sources () - """ - name: str - - # TODO CONTINUE \ No newline at end of file diff --git a/dto/client_data/FromAccount.py b/dto/client_data/FromAccount.py new file mode 100644 index 0000000..91307c8 --- /dev/null +++ b/dto/client_data/FromAccount.py @@ -0,0 +1,36 @@ +from typing import Literal, Optional, Self +from pydantic import BaseModel, ConfigDict, EmailStr, Field, model_validator + + +class FromAccount(BaseModel): + """ + Fields which can be extracted from account.pdf + """ + model_config = ConfigDict(validate_assignment=True, str_strip_whitespace=True) + + # From account.pdf + account_name: str = Field(..., min_length=1) + account_holder_name: str = Field(..., min_length=1) + account_holder_surname: str = Field(..., min_length=1) + + @model_validator(mode='after') + def check_account_name_is_name_surname(self) -> Self: + combined = f"{self.account_holder_name} {self.account_holder_surname}" + if combined != self.account_name: + raise ValueError(f'Account name is not name + surname: {self.account_name} != {combined}') + return self + + passport_number: str = Field(..., min_length=5) + + reference_currency: Literal["CHF", "EUR", "USD", "Other"] + other_currency: Optional[str] = None + + building_number: str = Field(..., min_length=1) + street_name: str = Field(..., min_length=1) + postal_code: str = Field(..., min_length=1) + city: str = Field(..., min_length=1) + country: str = Field(..., min_length=1) + + name: str = Field(..., min_length=1) + phone_number: str = Field(..., min_length=6) + email: EmailStr \ No newline at end of file diff --git a/dto/client_data/FromDescription.py b/dto/client_data/FromDescription.py new file mode 100644 index 0000000..e19420e --- /dev/null +++ b/dto/client_data/FromDescription.py @@ -0,0 +1,35 @@ +from typing import Literal, Optional +from pydantic import BaseModel, ConfigDict, EmailStr, Field + + +class FromDescription(BaseModel): + """ + Fields which can be extracted from description.txt + """ + model_config = ConfigDict(validate_assignment=True, str_strip_whitespace=True) + + + full_name: str = Field(..., min_length=1) + age: int = Field(..., ge=0, le=120) + nationality: str = Field(..., min_length=1) + + marital_status: Literal["single", "married", "divorced", "widowed"] + has_children: bool + + secondary_education_school: str + secondary_education_year: int = Field(..., ge=1900, le=2100) + university_name: str + university_graduation_year: int = Field(..., ge=1900, le=2100) + + occupation_title: str + employer: str + start_year: int = Field(..., ge=1900, le=2100) + annual_salary_eur: float = Field(..., ge=0) + + total_savings_eur: float = Field(..., ge=0) + has_properties: bool + + inheritance_amount_eur: float = Field(..., ge=0) + inheritance_year: int = Field(..., ge=1900, le=2100) + inheritance_source: str + diff --git a/dto/client_data/FromPassport.py b/dto/client_data/FromPassport.py new file mode 100644 index 0000000..773d135 --- /dev/null +++ b/dto/client_data/FromPassport.py @@ -0,0 +1,27 @@ +from datetime import date +from typing import Literal +from pydantic import BaseModel, ConfigDict, Field + + +class FromPassport(BaseModel): + """ + Fields which can be extracted from description.txt + """ + model_config = ConfigDict(validate_assignment=True, str_strip_whitespace=True) + + country: str = Field(..., min_length=3, max_length=3) # ISO 3166-1 alpha-3 + passport_number: str = Field(..., min_length=9, max_length=9, regex=r"^[A-Z0-9]{9}$") + + surname: str = Field(..., min_length=1) + given_names: str = Field(..., min_length=1) + + birth_date: date + citizenship: str = Field(..., min_length=2) + sex: Literal["M", "F"] + + issue_date: date + expiry_date: date + + signature_present: bool + + machine_readable_zone: str = Field(..., min_length=44) \ No newline at end of file diff --git a/dto/client_data/FromProfile.py b/dto/client_data/FromProfile.py new file mode 100644 index 0000000..64abbee --- /dev/null +++ b/dto/client_data/FromProfile.py @@ -0,0 +1,65 @@ +from datetime import date +from typing import List, Literal, Optional +from pydantic import BaseModel, ConfigDict, EmailStr, Field + + +class FromProfile(BaseModel): + """ + Fields which can be extracted from description.txt + """ + model_config = ConfigDict(validate_assignment=True, str_strip_whitespace=True) + + first_name: str = Field(..., min_length=1) + last_name: str = Field(..., min_length=1) + date_of_birth: date + nationality: str + country_of_domicile: str + gender: Literal["Female", "Male"] + + # ID information + passport_number: str = Field(..., min_length=9, max_length=9, regex=r"^[A-Z0-9]{9}$") + id_type: Literal["passport"] + id_issue_date: date + id_expiry_date: date + + # Contact + phone: str = Field(..., min_length=8) + email: EmailStr + address: str + + # Personal info + politically_exposed_person: bool + marital_status: Literal["Single", "Married", "Divorced", "Widowed"] + highest_education: Literal["Tertiary", "Secondary", "Primary", "None"] + education_history: Optional[str] = None + + # Employment + employment_status: Literal["Employee", "Self-Employed", "Unemployed", "Retired", "Student", "Diplomat", "Military", "Homemaker", "Other"] + employment_since: Optional[int] = None + employer: Optional[str] = None + position: Optional[str] = None + annual_salary_eur: Optional[float] = None + + # Wealth background + total_wealth_range: Literal["<1.5m", "1.5m-5m", "5m-10m", "10m-20m", "20m-50m", ">50m"] + origin_of_wealth: List[Literal["Employment", "Inheritance", "Business", "Investments", "Sale of real estate", "Retirement package", "Other"]] + inheritance_details: Optional[str] = None + + # Assets + business_assets_eur: float = Field(..., ge=0) + + # Income + estimated_annual_income: Literal["<250k", "250k-500k", "500k-1m", ">1m"] + income_country: str + + # Account preferences + commercial_account: bool + investment_risk_profile: Literal["Low", "Moderate", "Considerable", "High"] + mandate_type: Literal["Advisory", "Discretionary"] + investment_experience: Literal["Inexperienced", "Experienced", "Expert"] + investment_horizon: Literal["Short", "Medium", "Long-Term"] + preferred_markets: List[str] + + # Assets under management + total_aum: float + aum_to_transfer: float \ No newline at end of file diff --git a/dto/client_data/__init__.py b/dto/client_data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dto/client_data/cross_validate.py b/dto/client_data/cross_validate.py new file mode 100644 index 0000000..e545e40 --- /dev/null +++ b/dto/client_data/cross_validate.py @@ -0,0 +1,44 @@ +from enum import StrEnum +from typing import Any, Callable +from dto.client_data import FromDescription, FromPassport, FromProfile +from dto.client_data.FromAccount import FromAccount +from pydantic import BaseModel + + +class ValidatedData(BaseModel): + account: FromAccount + description: FromDescription + passport: FromPassport + profile: FromProfile + +class DocType(StrEnum): + account = "account" + description = "description" + passport = "passport" + profile = "profile" + + +class ValidationFailure(BaseModel): + doc1_type: DocType + doc1_val: str + + doc2_type: DocType + doc2_val: str + + + +def xref_client_name(data: ValidatedData) -> ValidationFailure: + if data.account.account_holder_name != data.description.full_name: + return ValidationFailure( + doc1_type=DocType.account, doc1_val=f"{data.account.account_holder_name=}", + doc2_type=DocType.description, doc2_val=f"{data.description.full_name=}" + ) + # TODO CONTINUE + +def xref_all(data: ValidatedData) -> list[ValidationFailure]: + xref_validators: list[Callable[[ValidatedData], ValidationFailure]] = [xref_client_name] + + validation_failures = [] + for validator in xref_validators: + validation_failures.append(validator(data)) + return validation_failures \ No newline at end of file