Files
alpinebits_python/src/alpine_bits_python/schemas.py
2025-12-03 12:12:37 +01:00

616 lines
22 KiB
Python

"""Pydantic models for data validation in AlpineBits.
These models provide validation for data before it's passed to:
- SQLAlchemy database models
- AlpineBits XML generation
- API endpoints
Separating validation (Pydantic) from persistence (SQLAlchemy) and
from XML generation (xsdata) follows clean architecture principles.
"""
import hashlib
import json
from datetime import UTC, date, datetime
from enum import Enum
from typing import Any
from pydantic import BaseModel, EmailStr, Field, field_validator, model_validator
from .const import WebhookStatus
# Generalized integer validator for reuse across models
def convert_to_int(field_name: str, v: Any) -> int:
"""Convert a value to integer, handling string inputs.
Args:
field_name: Name of the field being validated (for error messages)
v: Value to convert (can be int, str, or None)
Returns:
Integer value
Raises:
ValueError: If value is None or cannot be converted to int
"""
if v is None:
msg = f"{field_name} cannot be None"
raise ValueError(msg)
if isinstance(v, int):
return v
if isinstance(v, str):
try:
return int(v)
except ValueError as e:
msg = f"{field_name} must be a valid integer, got: {v}"
raise ValueError(msg) from e
msg = f"{field_name} must be int or str, got: {type(v)}"
raise ValueError(msg)
# Country name to ISO 3166-1 alpha-2 code mapping
COUNTRY_NAME_TO_CODE = {
# English names
"germany": "DE",
"italy": "IT",
"austria": "AT",
"switzerland": "CH",
"france": "FR",
"netherlands": "NL",
"belgium": "BE",
"spain": "ES",
"portugal": "PT",
"united kingdom": "GB",
"uk": "GB",
"czech republic": "CZ",
"poland": "PL",
"hungary": "HU",
"croatia": "HR",
"slovenia": "SI",
# German names
"deutschland": "DE",
"italien": "IT",
"österreich": "AT",
"schweiz": "CH",
"frankreich": "FR",
"niederlande": "NL",
"belgien": "BE",
"spanien": "ES",
"vereinigtes königreich": "GB",
"tschechien": "CZ",
"polen": "PL",
"ungarn": "HU",
"kroatien": "HR",
"slowenien": "SI",
# Italian names
"germania": "DE",
"italia": "IT",
"svizzera": "CH",
"francia": "FR",
"paesi bassi": "NL",
"belgio": "BE",
"spagna": "ES",
"portogallo": "PT",
"regno unito": "GB",
"repubblica ceca": "CZ",
"polonia": "PL",
"ungheria": "HU",
"croazia": "HR",
}
# phonetechtype enum 1,3,5 voice, fax, mobile
class PhoneTechType(Enum):
VOICE = "1"
FAX = "3"
MOBILE = "5"
class PhoneNumber(BaseModel):
"""Phone number with optional type."""
number: str = Field(..., min_length=1, max_length=50, pattern=r"^\+?[0-9\s\-()]+$")
tech_type: str | None = Field(None, pattern="^[135]$") # 1=voice, 3=fax, 5=mobile
@field_validator("number")
@classmethod
def clean_phone_number(cls, v: str) -> str:
"""Remove extra spaces from phone number."""
return " ".join(v.split())
class ReservationData(BaseModel):
"""Validated reservation data."""
unique_id: str = Field(..., min_length=1, max_length=200)
md5_unique_id: str | None = Field(None, min_length=1, max_length=32)
start_date: date
end_date: date
created_at: datetime = Field(default_factory=datetime.now)
num_adults: int = Field(..., ge=1)
num_children: int = Field(0, ge=0, le=10)
children_ages: list[int] = Field(default_factory=list)
hotel_id: str = Field(..., min_length=1, max_length=50)
hotel_name: str | None = Field(None, max_length=200)
offer: str | None = Field(None, max_length=500)
user_comment: str | None = Field(None, max_length=2000)
fbclid: str | None = Field(None, max_length=300)
gclid: str | None = Field(None, max_length=300)
# Advertising account IDs (populated conditionally based on fbclid/gclid)
meta_account_id: str | None = Field(None, max_length=200)
google_account_id: str | None = Field(None, max_length=200)
utm_source: str | None = Field(None, max_length=150)
utm_medium: str | None = Field(None, max_length=150)
utm_campaign: str | None = Field(None, max_length=150)
utm_term: str | None = Field(None, max_length=150)
utm_content: str | None = Field(None, max_length=150)
# RoomTypes fields (optional)
room_type_code: str | None = Field(None, min_length=1, max_length=8)
room_classification_code: str | None = Field(None, pattern=r"[0-9]+")
room_type: str | None = Field(None, pattern=r"^[1-5]$")
@model_validator(mode="after")
def ensure_md5(self) -> "ReservationData":
"""Ensure md5_unique_id is set after model validation.
Using a model_validator in 'after' mode lets us access all fields via
the instance and set md5_unique_id in-place when it wasn't provided.
"""
if not getattr(self, "md5_unique_id", None) and getattr(
self, "unique_id", None
):
self.md5_unique_id = hashlib.md5(self.unique_id.encode("utf-8")).hexdigest()
return self
@model_validator(mode="after")
def validate_children_ages(self) -> "ReservationData":
"""Ensure children_ages matches num_children."""
if len(self.children_ages) != self.num_children:
raise ValueError(
f"Number of children ages ({len(self.children_ages)}) "
f"must match num_children ({self.num_children})"
)
for age in self.children_ages:
if age < 0 or age > 17:
raise ValueError(f"Child age {age} must be between 0 and 17")
return self
class CustomerData(BaseModel):
"""Validated customer data for creating reservations and guests."""
given_name: str = Field(..., min_length=1, max_length=100)
surname: str = Field(..., min_length=1, max_length=100)
name_prefix: str | None = Field(None, max_length=20)
name_title: str | None = Field(None, max_length=20)
phone_numbers: list[tuple[str, None | PhoneTechType]] = Field(default_factory=list)
email_address: EmailStr | None = None
email_newsletter: bool | None = None
address_line: str | None = Field(None, max_length=255)
city_name: str | None = Field(None, max_length=100)
postal_code: str | None = Field(None, max_length=20)
country_code: str | None = Field(None, min_length=2, max_length=2)
address_catalog: bool | None = None
gender: str | None = Field(None, pattern="^(Male|Female|Unknown)$")
birth_date: str | None = Field(None, pattern=r"^\d{4}-\d{2}-\d{2}$") # ISO format
language: str | None = Field(None, min_length=2, max_length=2, pattern="^[a-z]{2}$")
@field_validator("given_name", "surname")
@classmethod
def name_must_not_be_empty(cls, v: str) -> str:
"""Ensure names are not just whitespace."""
if not v.strip():
raise ValueError("Name cannot be empty or whitespace")
return v.strip()
@field_validator("country_code", mode="before")
@classmethod
def normalize_country_code(cls, v: str | None) -> str | None:
"""Normalize country input to ISO 3166-1 alpha-2 code.
Handles:
- Country names in English, German, and Italian
- Already valid 2-letter codes (case-insensitive)
- None/empty values
Runs in 'before' mode to normalize before other validations.
This ensures that old data saved incorrectly in the database is
transformed into the correct format when retrieved, and that new
data is always normalized regardless of the source.
Args:
v: Country name or code (case-insensitive)
Returns:
2-letter ISO country code (uppercase) or None if input is None/empty
"""
if not v:
return None
# Convert to string and strip whitespace
country_input = str(v).strip()
if not country_input:
return None
# If already 2 letters, assume it's a country code (ISO 3166-1 alpha-2)
iso_country_code_length = 2
if len(country_input) == iso_country_code_length and country_input.isalpha():
return country_input.upper()
# Try to match as country name (case-insensitive)
country_lower = country_input.lower()
return COUNTRY_NAME_TO_CODE.get(country_lower, country_input)
@field_validator("language")
@classmethod
def normalize_language(cls, v: str | None) -> str | None:
"""Normalize language code to lowercase."""
return v.lower() if v else None
model_config = {"from_attributes": True} # Allow creation from ORM models
class HotelReservationIdData(BaseModel):
"""Validated hotel reservation ID data."""
res_id_type: str = Field(..., pattern=r"^[0-9]+$") # Must be numeric string
res_id_value: str | None = Field(None, min_length=1, max_length=64)
res_id_source: str | None = Field(None, min_length=1, max_length=64)
res_id_source_context: str | None = Field(None, min_length=1, max_length=64)
@field_validator(
"res_id_value", "res_id_source", "res_id_source_context", mode="before"
)
@classmethod
def trim_and_truncate(cls, v: str | None) -> str | None:
"""Trim whitespace and truncate to max length if needed.
Runs BEFORE field validation to ensure values are cleaned and truncated
before max_length constraints are checked.
"""
if not v:
return None
# Convert to string if needed
v = str(v)
# Strip whitespace
v = v.strip()
# Convert empty strings to None
if not v:
return None
# Truncate to 64 characters if needed
if len(v) > 64:
v = v[:64]
return v
model_config = {"from_attributes": True}
class CommentListItemData(BaseModel):
"""Validated comment list item."""
value: str = Field(..., min_length=1, max_length=1000)
list_item: str = Field(..., pattern=r"^[0-9]+$") # Numeric identifier
language: str = Field(..., min_length=2, max_length=2, pattern=r"^[a-z]{2}$")
@field_validator("language")
@classmethod
def normalize_language(cls, v: str) -> str:
"""Normalize language to lowercase."""
return v.lower()
model_config = {"from_attributes": True}
class CommentData(BaseModel):
"""Validated comment data."""
name: str # Should be validated against CommentName2 enum
text: str | None = Field(None, max_length=4000)
list_items: list[CommentListItemData] = Field(default_factory=list)
@field_validator("list_items")
@classmethod
def validate_list_items(
cls, v: list[CommentListItemData]
) -> list[CommentListItemData]:
"""Ensure list items have unique identifiers."""
if v:
item_ids = [item.list_item for item in v]
if len(item_ids) != len(set(item_ids)):
raise ValueError("List items must have unique identifiers")
return v
model_config = {"from_attributes": True}
class CommentsData(BaseModel):
"""Validated comments collection."""
comments: list[CommentData] = Field(default_factory=list, max_length=3)
@field_validator("comments")
@classmethod
def validate_comment_count(cls, v: list[CommentData]) -> list[CommentData]:
"""Ensure maximum 3 comments."""
if len(v) > 3:
raise ValueError("Maximum 3 comments allowed")
return v
model_config = {"from_attributes": True}
class HotelData(BaseModel):
"""Validated hotel configuration data."""
hotel_id: str = Field(..., min_length=1, max_length=50)
hotel_name: str = Field(..., min_length=1, max_length=200)
username: str = Field(..., min_length=1, max_length=100)
password_hash: str = Field(..., min_length=1, max_length=200)
meta_account_id: str | None = Field(None, max_length=50)
google_account_id: str | None = Field(None, max_length=50)
push_endpoint_url: str | None = Field(None, max_length=500)
push_endpoint_token: str | None = Field(None, max_length=200)
push_endpoint_username: str | None = Field(None, max_length=100)
created_at: datetime = Field(default_factory=lambda: datetime.now())
updated_at: datetime = Field(default_factory=lambda: datetime.now())
is_active: bool = Field(default=True)
@field_validator("hotel_id", "hotel_name", "username")
@classmethod
def strip_whitespace(cls, v: str) -> str:
"""Remove leading/trailing whitespace."""
return v.strip()
model_config = {"from_attributes": True}
class WebhookEndpointData(BaseModel):
"""Validated webhook endpoint configuration data."""
hotel_id: str = Field(..., min_length=1, max_length=50)
webhook_secret: str = Field(..., min_length=1, max_length=64)
webhook_type: str = Field(..., min_length=1, max_length=50)
description: str | None = Field(None, max_length=200)
is_enabled: bool = Field(default=True)
created_at: datetime = Field(default_factory=lambda: datetime.now())
@field_validator("hotel_id", "webhook_secret", "webhook_type")
@classmethod
def strip_whitespace(cls, v: str) -> str:
"""Remove leading/trailing whitespace."""
return v.strip()
model_config = {"from_attributes": True}
class WebhookRequestData(BaseModel):
"""Validated webhook request data.
This model handles the special case where:
- payload_json is required for creation (to calculate payload_hash)
- payload_json becomes optional after processing (can be purged for privacy/storage)
- payload_hash is auto-calculated from payload_json when provided
"""
# Required fields
payload_json: dict[str, Any] | None = Field(
..., description="Webhook payload (required for creation, nullable after purge)"
)
# Auto-calculated from payload_json
payload_hash: str | None = Field(
None,
min_length=64,
max_length=64,
description="SHA256 hash of canonical JSON payload (auto-calculated)",
)
# Optional foreign keys
webhook_endpoint_id: int | None = Field(None, gt=0)
hotel_id: str | None = Field(None, max_length=50)
# Processing tracking
status: WebhookStatus = Field(default=WebhookStatus.PENDING)
processing_started_at: datetime | None = None
processing_completed_at: datetime | None = None
# Retry handling
retry_count: int = Field(default=0, ge=0)
last_error: str | None = Field(None, max_length=2000)
# Payload metadata
purged_at: datetime | None = None
# Request metadata
created_at: datetime = Field(default_factory=lambda: datetime.now())
source_ip: str | None = Field(None, max_length=45)
user_agent: str | None = Field(None, max_length=500)
# Result tracking
created_customer_id: int | None = Field(None, gt=0)
created_reservation_id: int | None = Field(None, gt=0)
@model_validator(mode="after")
def calculate_payload_hash(self) -> "WebhookRequestData":
"""Auto-calculate payload_hash from payload_json if not provided.
Uses the same hashing algorithm as api.py:
- Canonical JSON with sorted keys
- UTF-8 encoding
- SHA256 hash
This runs after all field validation, so we can access the validated payload_json.
"""
# Only calculate if payload_json is provided and payload_hash is not set
if self.payload_json is not None and self.payload_hash is None:
# Create canonical JSON string (sorted keys for consistency)
payload_json_str = json.dumps(self.payload_json, sort_keys=True)
# Calculate SHA256 hash
self.payload_hash = hashlib.sha256(
payload_json_str.encode("utf-8")
).hexdigest()
return self
@model_validator(mode="after")
def validate_payload_hash_requirements(self) -> "WebhookRequestData":
"""Ensure payload_hash is present (either provided or calculated).
This validator runs after calculate_payload_hash, so payload_hash should
be set if payload_json was provided.
"""
if self.payload_hash is None:
raise ValueError(
"payload_hash is required. It can be auto-calculated from payload_json "
"or explicitly provided."
)
return self
@field_validator("status", mode="before")
@classmethod
def normalize_status(cls, v: str | WebhookStatus) -> WebhookStatus:
"""Normalize status to WebhookStatus enum."""
if isinstance(v, WebhookStatus):
return v
if isinstance(v, str):
return WebhookStatus(v)
raise ValueError(f"Invalid webhook status: {v}")
model_config = {"from_attributes": True}
# Example usage in a service layer
class ConversionGuestData(BaseModel):
"""Validated conversion guest data from PMS XML.
Handles validation and hashing for guest records extracted from
hotel PMS conversion XML files.
"""
hotel_id: str = Field(..., min_length=1, max_length=50)
guest_id: int = Field(..., gt=0)
guest_first_name: str | None = Field(None, max_length=100)
guest_last_name: str | None = Field(None, max_length=100)
guest_email: str | None = Field(None, max_length=200)
guest_country_code: str | None = Field(None, max_length=10)
guest_birth_date: date | None = None
# Auto-calculated hashed fields
hashed_first_name: str | None = Field(None, max_length=64)
hashed_last_name: str | None = Field(None, max_length=64)
hashed_email: str | None = Field(None, max_length=64)
hashed_country_code: str | None = Field(None, max_length=64)
hashed_birth_date: str | None = Field(None, max_length=64)
# Timestamps
first_seen: datetime = Field(default_factory=lambda: datetime.now(UTC))
last_seen: datetime = Field(default_factory=lambda: datetime.now(UTC))
@staticmethod
def _normalize_and_hash(value: str | None) -> str | None:
"""Normalize and hash a value for privacy-preserving matching.
Uses the same logic as ConversionGuest._normalize_and_hash.
"""
if value is None or value == "":
return None
# Normalize: lowercase, strip whitespace
normalized = value.lower().strip()
if not normalized:
return None
# Hash with SHA256
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
@model_validator(mode="after")
def calculate_hashes(self) -> "ConversionGuestData":
"""Auto-calculate hashed fields from plain text fields."""
if self.hashed_first_name is None:
self.hashed_first_name = self._normalize_and_hash(self.guest_first_name)
if self.hashed_last_name is None:
self.hashed_last_name = self._normalize_and_hash(self.guest_last_name)
if self.hashed_email is None:
self.hashed_email = self._normalize_and_hash(self.guest_email)
if self.hashed_country_code is None:
self.hashed_country_code = self._normalize_and_hash(self.guest_country_code)
if self.hashed_birth_date is None and self.guest_birth_date is not None:
self.hashed_birth_date = self._normalize_and_hash(
self.guest_birth_date.isoformat()
)
return self
@field_validator("guest_id", mode="before")
@classmethod
def convert_guest_id_to_int(cls, v: Any) -> int:
"""Convert guest_id to integer (handles string input from XML)."""
return convert_to_int("guest_id", v)
model_config = {"from_attributes": True}
class ConversionData(BaseModel):
"""Validated conversion data from PMS XML.
Handles validation for conversion records extracted from
hotel PMS conversion XML files. This model ensures proper type conversion
and validation before creating a Conversion database entry.
"""
# Foreign key references (nullable - matched after creation)
reservation_id: int | None = Field(None, gt=0)
customer_id: int | None = Field(None, gt=0)
# Required reservation metadata from PMS
hotel_id: str = Field(..., min_length=1, max_length=50)
pms_reservation_id: int = Field(..., gt=0)
guest_id: int | None = Field(None, gt=0)
# Optional reservation metadata
reservation_number: str | None = Field(None, max_length=100)
reservation_date: date | None = None
creation_time: datetime | None = None
reservation_type: str | None = Field(None, max_length=50)
booking_channel: str | None = Field(None, max_length=100)
# Advertising/tracking data (used for matching)
advertising_medium: str | None = Field(None, max_length=200)
advertising_partner: str | None = Field(None, max_length=200)
advertising_campagne: str | None = Field(None, max_length=500)
# Attribution flags
directly_attributable: bool = Field(default=False)
guest_matched: bool = Field(default=False)
# Timestamps (auto-managed)
created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
updated_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
@field_validator(
"pms_reservation_id", "guest_id", "reservation_id", "customer_id",
mode="before"
)
@classmethod
def convert_int_fields(cls, v: Any) -> int | None:
"""Convert integer fields from string to int (handles XML input)."""
if v is None or v == "":
return None
# Get the field name from the validation context if available
# For now, use a generic name since we handle multiple fields
return convert_to_int("field", v)
@field_validator("hotel_id", "reservation_number", "reservation_type",
"booking_channel", "advertising_medium", "advertising_partner",
"advertising_campagne", mode="before")
@classmethod
def strip_string_fields(cls, v: str | None) -> str | None:
"""Strip whitespace from string fields."""
if v is None:
return None
stripped = str(v).strip()
return stripped if stripped else None
model_config = {"from_attributes": True}